bakery-ia/services/procurement/app/ml/supplier_insights_orchestrator.py

"""
Supplier Insights Orchestrator
Coordinates supplier performance analysis and insight posting
"""

import pandas as pd
from typing import Dict, List, Any, Optional
import structlog
from datetime import datetime
from uuid import UUID
import sys
import os

# Add shared clients to path
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
from shared.clients.ai_insights_client import AIInsightsClient
from shared.messaging import UnifiedEventPublisher

from app.ml.supplier_performance_predictor import SupplierPerformancePredictor

logger = structlog.get_logger()


class SupplierInsightsOrchestrator:
    """
    Orchestrates supplier performance analysis and insight generation workflow.

    Workflow:
    1. Analyze supplier performance from historical orders
    2. Generate insights for procurement risk management
    3. Post insights to AI Insights Service
    4. Publish recommendation events to RabbitMQ
    5. Provide supplier comparison and recommendations
    6. Track supplier reliability scores
    """

    def __init__(
        self,
        ai_insights_base_url: str = "http://ai-insights-service:8000",
        event_publisher: Optional[UnifiedEventPublisher] = None
    ):
        self.predictor = SupplierPerformancePredictor()
        self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
        self.event_publisher = event_publisher

    async def analyze_and_post_supplier_insights(
        self,
        tenant_id: str,
        supplier_id: str,
        order_history: pd.DataFrame,
        min_orders: int = 10
    ) -> Dict[str, Any]:
        """
        Complete workflow: Analyze supplier and post insights.

        Args:
            tenant_id: Tenant identifier
            supplier_id: Supplier identifier
            order_history: Historical order data
            min_orders: Minimum orders for analysis

        Returns:
            Workflow results with analysis and posted insights
        """
        logger.info(
            "Starting supplier performance analysis workflow",
            tenant_id=tenant_id,
            supplier_id=supplier_id,
            orders=len(order_history)
        )

        # Step 1: Analyze supplier performance
        analysis_results = await self.predictor.analyze_supplier_performance(
            tenant_id=tenant_id,
            supplier_id=supplier_id,
            order_history=order_history,
            min_orders=min_orders
        )

        logger.info(
            "Supplier analysis complete",
            supplier_id=supplier_id,
            reliability_score=analysis_results.get('reliability_score'),
            insights_generated=len(analysis_results.get('insights', []))
        )

        # Step 2: Enrich insights with tenant_id and supplier context
        enriched_insights = self._enrich_insights(
            analysis_results.get('insights', []),
            tenant_id,
            supplier_id
        )

        # Step 3: Post insights to AI Insights Service
        if enriched_insights:
            post_results = await self.ai_insights_client.create_insights_bulk(
                tenant_id=UUID(tenant_id),
                insights=enriched_insights
            )

            logger.info(
                "Supplier insights posted to AI Insights Service",
                supplier_id=supplier_id,
                total=post_results['total'],
                successful=post_results['successful'],
                failed=post_results['failed']
            )
        else:
            post_results = {'total': 0, 'successful': 0, 'failed': 0}
            logger.info("No insights to post for supplier", supplier_id=supplier_id)

        # Step 4: Publish insight events to RabbitMQ
        created_insights = post_results.get('created_insights', [])
        if created_insights:
            supplier_context = {'supplier_id': supplier_id}
            await self._publish_insight_events(
                tenant_id=tenant_id,
                insights=created_insights,
                supplier_context=supplier_context
            )

        # Step 5: Return comprehensive results
        return {
            'tenant_id': tenant_id,
            'supplier_id': supplier_id,
            'analyzed_at': analysis_results['analyzed_at'],
            'orders_analyzed': analysis_results['orders_analyzed'],
            'reliability_score': analysis_results.get('reliability_score'),
            'risk_assessment': analysis_results.get('risk_assessment', {}),
            'predictions': analysis_results.get('predictions', {}),
            'insights_generated': len(enriched_insights),
            'insights_posted': post_results['successful'],
            'insights_failed': post_results['failed'],
            'created_insights': post_results.get('created_insights', [])
        }

    def _enrich_insights(
        self,
        insights: List[Dict[str, Any]],
        tenant_id: str,
        supplier_id: str
    ) -> List[Dict[str, Any]]:
        """
        Enrich insights with required fields for AI Insights Service.

        Args:
            insights: Raw insights from predictor
            tenant_id: Tenant identifier
            supplier_id: Supplier identifier

        Returns:
            Enriched insights ready for posting
        """
        enriched = []

        for insight in insights:
            # Add required tenant_id
            enriched_insight = insight.copy()
            enriched_insight['tenant_id'] = tenant_id

            # Add supplier context to metrics
            if 'metrics_json' not in enriched_insight:
                enriched_insight['metrics_json'] = {}

            enriched_insight['metrics_json']['supplier_id'] = supplier_id

            # Add source metadata
            enriched_insight['source_service'] = 'procurement'
            enriched_insight['source_model'] = 'supplier_performance_predictor'
            enriched_insight['detected_at'] = datetime.utcnow().isoformat()

            enriched.append(enriched_insight)

        return enriched

    async def _publish_insight_events(self, tenant_id, insights, supplier_context=None):
        """
        Publish insight events to RabbitMQ for alert processing.

        Args:
            tenant_id: Tenant identifier
            insights: List of created insights
            supplier_context: Additional context about the supplier
        """
        if not self.event_publisher:
            logger.warning("No event publisher available for supplier insights")
            return

        for insight in insights:
            # Determine severity based on confidence and priority
            confidence = insight.get('confidence', 0)
            priority = insight.get('priority', 'medium')

            # Map priority to severity, with confidence as tiebreaker
            if priority == 'critical' or (priority == 'high' and confidence >= 70):
                severity = 'high'
            elif priority == 'high' or (priority == 'medium' and confidence >= 80):
                severity = 'medium'
            else:
                severity = 'low'

            # Prepare the event data
            event_data = {
                'insight_id': insight.get('id'),
                'type': insight.get('type'),
                'title': insight.get('title'),
                'description': insight.get('description'),
                'category': insight.get('category'),
                'priority': insight.get('priority'),
                'confidence': confidence,
                'recommendation': insight.get('recommendation_actions', []),
                'impact_type': insight.get('impact_type'),
                'impact_value': insight.get('impact_value'),
                'supplier_id': supplier_context.get('supplier_id') if supplier_context else None,
                'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),
                'source_service': 'procurement',
                'source_model': 'supplier_performance_predictor'
            }

            try:
                await self.event_publisher.publish_recommendation(
                    event_type='ai_supplier_recommendation',
                    tenant_id=tenant_id,
                    severity=severity,
                    data=event_data
                )
                logger.info(
                    "Published supplier insight event",
                    tenant_id=tenant_id,
                    insight_id=insight.get('id'),
                    severity=severity
                )
            except Exception as e:
                logger.error(
                    "Failed to publish supplier insight event",
                    tenant_id=tenant_id,
                    insight_id=insight.get('id'),
                    error=str(e)
                )

    async def analyze_all_suppliers(
        self,
        tenant_id: str,
        suppliers_data: Dict[str, pd.DataFrame],
        min_orders: int = 10
    ) -> Dict[str, Any]:
        """
        Analyze all suppliers for a tenant and generate comparative insights.

        Args:
            tenant_id: Tenant identifier
            suppliers_data: Dict of {supplier_id: order_history DataFrame}
            min_orders: Minimum orders for analysis

        Returns:
            Comprehensive analysis with supplier comparison
        """
        logger.info(
            "Analyzing all suppliers for tenant",
            tenant_id=tenant_id,
            suppliers=len(suppliers_data)
        )

        all_results = []
        total_insights_posted = 0

        # Analyze each supplier
        for supplier_id, order_history in suppliers_data.items():
            try:
                results = await self.analyze_and_post_supplier_insights(
                    tenant_id=tenant_id,
                    supplier_id=supplier_id,
                    order_history=order_history,
                    min_orders=min_orders
                )

                all_results.append(results)
                total_insights_posted += results['insights_posted']

            except Exception as e:
                logger.error(
                    "Error analyzing supplier",
                    supplier_id=supplier_id,
                    error=str(e)
                )

        # Compare suppliers
        comparison = self.predictor.compare_suppliers(
            [r for r in all_results if r.get('reliability_score') is not None]
        )

        # Generate comparative insights if needed
        comparative_insights = self._generate_comparative_insights(
            tenant_id, comparison
        )

        if comparative_insights:
            enriched_comparative = self._enrich_insights(
                comparative_insights, tenant_id, 'all_suppliers'
            )

            post_results = await self.ai_insights_client.create_insights_bulk(
                tenant_id=UUID(tenant_id),
                insights=enriched_comparative
            )

            total_insights_posted += post_results['successful']

        logger.info(
            "All suppliers analysis complete",
            tenant_id=tenant_id,
            suppliers_analyzed=len(all_results),
            total_insights_posted=total_insights_posted
        )

        return {
            'tenant_id': tenant_id,
            'analyzed_at': datetime.utcnow().isoformat(),
            'suppliers_analyzed': len(all_results),
            'supplier_results': all_results,
            'comparison': comparison,
            'total_insights_posted': total_insights_posted
        }

    def _generate_comparative_insights(
        self,
        tenant_id: str,
        comparison: Dict[str, Any]
    ) -> List[Dict[str, Any]]:
        """
        Generate insights from supplier comparison.

        Args:
            tenant_id: Tenant identifier
            comparison: Supplier comparison results

        Returns:
            List of comparative insights
        """
        insights = []

        if 'recommendations' in comparison and comparison['recommendations']:
            for rec in comparison['recommendations']:
                if 'URGENT' in rec['recommendation']:
                    priority = 'critical'
                elif 'high-risk' in rec.get('reason', '').lower():
                    priority = 'high'
                else:
                    priority = 'medium'

                insights.append({
                    'type': 'recommendation',
                    'priority': priority,
                    'category': 'procurement',
                    'title': 'Supplier Comparison: Action Required',
                    'description': rec['recommendation'],
                    'impact_type': 'cost_optimization',
                    'impact_value': 0,
                    'impact_unit': 'recommendation',
                    'confidence': 85,
                    'metrics_json': {
                        'comparison_type': 'multi_supplier',
                        'suppliers_compared': comparison['suppliers_compared'],
                        'top_supplier': comparison.get('top_supplier'),
                        'top_score': comparison.get('top_supplier_score'),
                        'reason': rec.get('reason', '')
                    },
                    'actionable': True,
                    'recommendation_actions': [
                        {
                            'label': 'Review Supplier Portfolio',
                            'action': 'review_supplier_portfolio',
                            'params': {'tenant_id': tenant_id}
                        }
                    ],
                    'source_service': 'procurement',
                    'source_model': 'supplier_performance_predictor'
                })

        return insights

    async def get_supplier_risk_score(
        self,
        supplier_id: str
    ) -> Optional[int]:
        """
        Get cached reliability score for a supplier.

        Args:
            supplier_id: Supplier identifier

        Returns:
            Reliability score (0-100) or None if not analyzed
        """
        return self.predictor.get_supplier_reliability_score(supplier_id)

    async def close(self):
        """Close HTTP client connections."""
        await self.ai_insights_client.close()
Improve AI logic 2025-11-05 13:34:56 +01:00			`"""`
			`Supplier Insights Orchestrator`
			`Coordinates supplier performance analysis and insight posting`
			`"""`

			`import pandas as pd`
			`from typing import Dict, List, Any, Optional`
			`import structlog`
			`from datetime import datetime`
			`from uuid import UUID`
			`import sys`
			`import os`

			`# Add shared clients to path`
			`sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))`
			`from shared.clients.ai_insights_client import AIInsightsClient`
Add AI insights feature 2025-12-15 21:14:22 +01:00			`from shared.messaging import UnifiedEventPublisher`
Improve AI logic 2025-11-05 13:34:56 +01:00
			`from app.ml.supplier_performance_predictor import SupplierPerformancePredictor`

			`logger = structlog.get_logger()`


			`class SupplierInsightsOrchestrator:`
			`"""`
			`Orchestrates supplier performance analysis and insight generation workflow.`

			`Workflow:`
			`1. Analyze supplier performance from historical orders`
			`2. Generate insights for procurement risk management`
			`3. Post insights to AI Insights Service`
Add AI insights feature 2025-12-15 21:14:22 +01:00			`4. Publish recommendation events to RabbitMQ`
			`5. Provide supplier comparison and recommendations`
			`6. Track supplier reliability scores`
Improve AI logic 2025-11-05 13:34:56 +01:00			`"""`

			`def __init__(`
			`self,`
Add AI insights feature 2025-12-15 21:14:22 +01:00			`ai_insights_base_url: str = "http://ai-insights-service:8000",`
			`event_publisher: Optional[UnifiedEventPublisher] = None`
Improve AI logic 2025-11-05 13:34:56 +01:00			`):`
			`self.predictor = SupplierPerformancePredictor()`
			`self.ai_insights_client = AIInsightsClient(ai_insights_base_url)`
Add AI insights feature 2025-12-15 21:14:22 +01:00			`self.event_publisher = event_publisher`
Improve AI logic 2025-11-05 13:34:56 +01:00
			`async def analyze_and_post_supplier_insights(`
			`self,`
			`tenant_id: str,`
			`supplier_id: str,`
			`order_history: pd.DataFrame,`
			`min_orders: int = 10`
			`) -> Dict[str, Any]:`
			`"""`
			`Complete workflow: Analyze supplier and post insights.`

			`Args:`
			`tenant_id: Tenant identifier`
			`supplier_id: Supplier identifier`
			`order_history: Historical order data`
			`min_orders: Minimum orders for analysis`

			`Returns:`
			`Workflow results with analysis and posted insights`
			`"""`
			`logger.info(`
			`"Starting supplier performance analysis workflow",`
			`tenant_id=tenant_id,`
			`supplier_id=supplier_id,`
			`orders=len(order_history)`
			`)`

			`# Step 1: Analyze supplier performance`
			`analysis_results = await self.predictor.analyze_supplier_performance(`
			`tenant_id=tenant_id,`
			`supplier_id=supplier_id,`
			`order_history=order_history,`
			`min_orders=min_orders`
			`)`

			`logger.info(`
			`"Supplier analysis complete",`
			`supplier_id=supplier_id,`
			`reliability_score=analysis_results.get('reliability_score'),`
			`insights_generated=len(analysis_results.get('insights', []))`
			`)`

			`# Step 2: Enrich insights with tenant_id and supplier context`
			`enriched_insights = self._enrich_insights(`
			`analysis_results.get('insights', []),`
			`tenant_id,`
			`supplier_id`
			`)`

			`# Step 3: Post insights to AI Insights Service`
			`if enriched_insights:`
			`post_results = await self.ai_insights_client.create_insights_bulk(`
			`tenant_id=UUID(tenant_id),`
			`insights=enriched_insights`
			`)`

			`logger.info(`
			`"Supplier insights posted to AI Insights Service",`
			`supplier_id=supplier_id,`
			`total=post_results['total'],`
			`successful=post_results['successful'],`
			`failed=post_results['failed']`
			`)`
			`else:`
			`post_results = {'total': 0, 'successful': 0, 'failed': 0}`
			`logger.info("No insights to post for supplier", supplier_id=supplier_id)`

Add AI insights feature 2025-12-15 21:14:22 +01:00			`# Step 4: Publish insight events to RabbitMQ`
			`created_insights = post_results.get('created_insights', [])`
			`if created_insights:`
			`supplier_context = {'supplier_id': supplier_id}`
			`await self._publish_insight_events(`
			`tenant_id=tenant_id,`
			`insights=created_insights,`
			`supplier_context=supplier_context`
			`)`

			`# Step 5: Return comprehensive results`
Improve AI logic 2025-11-05 13:34:56 +01:00			`return {`
			`'tenant_id': tenant_id,`
			`'supplier_id': supplier_id,`
			`'analyzed_at': analysis_results['analyzed_at'],`
			`'orders_analyzed': analysis_results['orders_analyzed'],`
			`'reliability_score': analysis_results.get('reliability_score'),`
			`'risk_assessment': analysis_results.get('risk_assessment', {}),`
			`'predictions': analysis_results.get('predictions', {}),`
			`'insights_generated': len(enriched_insights),`
			`'insights_posted': post_results['successful'],`
			`'insights_failed': post_results['failed'],`
			`'created_insights': post_results.get('created_insights', [])`
			`}`

			`def _enrich_insights(`
			`self,`
			`insights: List[Dict[str, Any]],`
			`tenant_id: str,`
			`supplier_id: str`
			`) -> List[Dict[str, Any]]:`
			`"""`
			`Enrich insights with required fields for AI Insights Service.`

			`Args:`
			`insights: Raw insights from predictor`
			`tenant_id: Tenant identifier`
			`supplier_id: Supplier identifier`

			`Returns:`
			`Enriched insights ready for posting`
			`"""`
			`enriched = []`

			`for insight in insights:`
			`# Add required tenant_id`
			`enriched_insight = insight.copy()`
			`enriched_insight['tenant_id'] = tenant_id`

			`# Add supplier context to metrics`
			`if 'metrics_json' not in enriched_insight:`
			`enriched_insight['metrics_json'] = {}`

			`enriched_insight['metrics_json']['supplier_id'] = supplier_id`

			`# Add source metadata`
			`enriched_insight['source_service'] = 'procurement'`
			`enriched_insight['source_model'] = 'supplier_performance_predictor'`
			`enriched_insight['detected_at'] = datetime.utcnow().isoformat()`

			`enriched.append(enriched_insight)`

			`return enriched`

Add AI insights feature 2025-12-15 21:14:22 +01:00			`async def _publish_insight_events(self, tenant_id, insights, supplier_context=None):`
			`"""`
			`Publish insight events to RabbitMQ for alert processing.`

			`Args:`
			`tenant_id: Tenant identifier`
			`insights: List of created insights`
			`supplier_context: Additional context about the supplier`
			`"""`
			`if not self.event_publisher:`
			`logger.warning("No event publisher available for supplier insights")`
			`return`

			`for insight in insights:`
			`# Determine severity based on confidence and priority`
			`confidence = insight.get('confidence', 0)`
			`priority = insight.get('priority', 'medium')`

			`# Map priority to severity, with confidence as tiebreaker`
			`if priority == 'critical' or (priority == 'high' and confidence >= 70):`
			`severity = 'high'`
			`elif priority == 'high' or (priority == 'medium' and confidence >= 80):`
			`severity = 'medium'`
			`else:`
			`severity = 'low'`

			`# Prepare the event data`
			`event_data = {`
			`'insight_id': insight.get('id'),`
			`'type': insight.get('type'),`
			`'title': insight.get('title'),`
			`'description': insight.get('description'),`
			`'category': insight.get('category'),`
			`'priority': insight.get('priority'),`
			`'confidence': confidence,`
			`'recommendation': insight.get('recommendation_actions', []),`
			`'impact_type': insight.get('impact_type'),`
			`'impact_value': insight.get('impact_value'),`
			`'supplier_id': supplier_context.get('supplier_id') if supplier_context else None,`
			`'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),`
			`'source_service': 'procurement',`
			`'source_model': 'supplier_performance_predictor'`
			`}`

			`try:`
			`await self.event_publisher.publish_recommendation(`
			`event_type='ai_supplier_recommendation',`
			`tenant_id=tenant_id,`
			`severity=severity,`
			`data=event_data`
			`)`
			`logger.info(`
			`"Published supplier insight event",`
			`tenant_id=tenant_id,`
			`insight_id=insight.get('id'),`
			`severity=severity`
			`)`
			`except Exception as e:`
			`logger.error(`
			`"Failed to publish supplier insight event",`
			`tenant_id=tenant_id,`
			`insight_id=insight.get('id'),`
			`error=str(e)`
			`)`

Improve AI logic 2025-11-05 13:34:56 +01:00			`async def analyze_all_suppliers(`
			`self,`
			`tenant_id: str,`
			`suppliers_data: Dict[str, pd.DataFrame],`
			`min_orders: int = 10`
			`) -> Dict[str, Any]:`
			`"""`
			`Analyze all suppliers for a tenant and generate comparative insights.`

			`Args:`
			`tenant_id: Tenant identifier`
			`suppliers_data: Dict of {supplier_id: order_history DataFrame}`
			`min_orders: Minimum orders for analysis`

			`Returns:`
			`Comprehensive analysis with supplier comparison`
			`"""`
			`logger.info(`
			`"Analyzing all suppliers for tenant",`
			`tenant_id=tenant_id,`
			`suppliers=len(suppliers_data)`
			`)`

			`all_results = []`
			`total_insights_posted = 0`

			`# Analyze each supplier`
			`for supplier_id, order_history in suppliers_data.items():`
			`try:`
			`results = await self.analyze_and_post_supplier_insights(`
			`tenant_id=tenant_id,`
			`supplier_id=supplier_id,`
			`order_history=order_history,`
			`min_orders=min_orders`
			`)`

			`all_results.append(results)`
			`total_insights_posted += results['insights_posted']`

			`except Exception as e:`
			`logger.error(`
			`"Error analyzing supplier",`
			`supplier_id=supplier_id,`
			`error=str(e)`
			`)`

			`# Compare suppliers`
			`comparison = self.predictor.compare_suppliers(`
			`[r for r in all_results if r.get('reliability_score') is not None]`
			`)`

			`# Generate comparative insights if needed`
			`comparative_insights = self._generate_comparative_insights(`
			`tenant_id, comparison`
			`)`

			`if comparative_insights:`
			`enriched_comparative = self._enrich_insights(`
			`comparative_insights, tenant_id, 'all_suppliers'`
			`)`

			`post_results = await self.ai_insights_client.create_insights_bulk(`
			`tenant_id=UUID(tenant_id),`
			`insights=enriched_comparative`
			`)`

			`total_insights_posted += post_results['successful']`

			`logger.info(`
			`"All suppliers analysis complete",`
			`tenant_id=tenant_id,`
			`suppliers_analyzed=len(all_results),`
			`total_insights_posted=total_insights_posted`
			`)`

			`return {`
			`'tenant_id': tenant_id,`
			`'analyzed_at': datetime.utcnow().isoformat(),`
			`'suppliers_analyzed': len(all_results),`
			`'supplier_results': all_results,`
			`'comparison': comparison,`
			`'total_insights_posted': total_insights_posted`
			`}`

			`def _generate_comparative_insights(`
			`self,`
			`tenant_id: str,`
			`comparison: Dict[str, Any]`
			`) -> List[Dict[str, Any]]:`
			`"""`
			`Generate insights from supplier comparison.`

			`Args:`
			`tenant_id: Tenant identifier`
			`comparison: Supplier comparison results`

			`Returns:`
			`List of comparative insights`
			`"""`
			`insights = []`

			`if 'recommendations' in comparison and comparison['recommendations']:`
			`for rec in comparison['recommendations']:`
			`if 'URGENT' in rec['recommendation']:`
			`priority = 'critical'`
			`elif 'high-risk' in rec.get('reason', '').lower():`
			`priority = 'high'`
			`else:`
			`priority = 'medium'`

			`insights.append({`
			`'type': 'recommendation',`
			`'priority': priority,`
			`'category': 'procurement',`
			`'title': 'Supplier Comparison: Action Required',`
			`'description': rec['recommendation'],`
			`'impact_type': 'cost_optimization',`
			`'impact_value': 0,`
			`'impact_unit': 'recommendation',`
			`'confidence': 85,`
			`'metrics_json': {`
			`'comparison_type': 'multi_supplier',`
			`'suppliers_compared': comparison['suppliers_compared'],`
			`'top_supplier': comparison.get('top_supplier'),`
			`'top_score': comparison.get('top_supplier_score'),`
			`'reason': rec.get('reason', '')`
			`},`
			`'actionable': True,`
			`'recommendation_actions': [`
			`{`
			`'label': 'Review Supplier Portfolio',`
			`'action': 'review_supplier_portfolio',`
			`'params': {'tenant_id': tenant_id}`
			`}`
			`],`
			`'source_service': 'procurement',`
			`'source_model': 'supplier_performance_predictor'`
			`})`

			`return insights`

			`async def get_supplier_risk_score(`
			`self,`
			`supplier_id: str`
			`) -> Optional[int]:`
			`"""`
			`Get cached reliability score for a supplier.`

			`Args:`
			`supplier_id: Supplier identifier`

			`Returns:`
			`Reliability score (0-100) or None if not analyzed`
			`"""`
			`return self.predictor.get_supplier_reliability_score(supplier_id)`

			`async def close(self):`
			`"""Close HTTP client connections."""`
			`await self.ai_insights_client.close()`