Add AI insights feature

This commit is contained in:
Urtzi Alfaro
2025-12-15 21:14:22 +01:00
parent 5642b5a0c0
commit c566967bea
39 changed files with 17729 additions and 404 deletions

View File

@@ -14,6 +14,7 @@ import os
# Add shared clients to path
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
from shared.clients.ai_insights_client import AIInsightsClient
from shared.messaging import UnifiedEventPublisher
from app.ml.yield_predictor import YieldPredictor
@@ -28,15 +29,18 @@ class YieldInsightsOrchestrator:
1. Predict yield for upcoming production run or analyze historical performance
2. Generate insights for yield optimization opportunities
3. Post insights to AI Insights Service
4. Provide yield predictions for production planning
4. Publish recommendation events to RabbitMQ
5. Provide yield predictions for production planning
"""
def __init__(
self,
ai_insights_base_url: str = "http://ai-insights-service:8000"
ai_insights_base_url: str = "http://ai-insights-service:8000",
event_publisher: Optional[UnifiedEventPublisher] = None
):
self.predictor = YieldPredictor()
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
self.event_publisher = event_publisher
async def predict_and_post_insights(
self,
@@ -54,7 +58,7 @@ class YieldInsightsOrchestrator:
recipe_id: Recipe identifier
production_history: Historical production runs
production_context: Upcoming production context:
- worker_id
- staff_assigned (list of staff IDs)
- planned_start_time
- batch_size
- planned_quantity
@@ -109,6 +113,17 @@ class YieldInsightsOrchestrator:
successful=post_results['successful'],
failed=post_results['failed']
)
# Step 4: Publish recommendation events to RabbitMQ
created_insights = post_results.get('created_insights', [])
if created_insights:
recipe_context = production_context.copy() if production_context else {}
recipe_context['recipe_id'] = recipe_id
await self._publish_insight_events(
tenant_id=tenant_id,
insights=created_insights,
recipe_context=recipe_context
)
else:
post_results = {'total': 0, 'successful': 0, 'failed': 0}
logger.info("No insights to post for recipe", recipe_id=recipe_id)
@@ -193,6 +208,15 @@ class YieldInsightsOrchestrator:
total=post_results['total'],
successful=post_results['successful']
)
# Step 4: Publish recommendation events to RabbitMQ
created_insights = post_results.get('created_insights', [])
if created_insights:
await self._publish_insight_events(
tenant_id=tenant_id,
insights=created_insights,
recipe_context={'recipe_id': recipe_id}
)
else:
post_results = {'total': 0, 'successful': 0, 'failed': 0}
@@ -248,6 +272,83 @@ class YieldInsightsOrchestrator:
return enriched
async def _publish_insight_events(
self,
tenant_id: str,
insights: List[Dict[str, Any]],
recipe_context: Optional[Dict[str, Any]] = None
) -> None:
"""
Publish recommendation events to RabbitMQ for each insight.
Args:
tenant_id: Tenant identifier
insights: List of created insights (with insight_id from AI Insights Service)
recipe_context: Optional recipe context (name, id, etc.)
"""
if not self.event_publisher:
logger.warning("Event publisher not configured, skipping event publication")
return
for insight in insights:
try:
# Determine severity based on confidence and priority
confidence = insight.get('confidence', 0)
priority = insight.get('priority', 'medium')
if priority == 'urgent' or confidence >= 90:
severity = 'urgent'
elif priority == 'high' or confidence >= 70:
severity = 'high'
elif priority == 'medium' or confidence >= 50:
severity = 'medium'
else:
severity = 'low'
# Build event metadata
event_metadata = {
'insight_id': insight.get('id'), # From AI Insights Service response
'insight_type': insight.get('insight_type'),
'recipe_id': insight.get('metrics_json', {}).get('recipe_id'),
'recipe_name': recipe_context.get('recipe_name') if recipe_context else None,
'predicted_yield': insight.get('metrics_json', {}).get('predicted_yield'),
'confidence': confidence,
'recommendation': insight.get('recommendation'),
'impact_type': insight.get('impact_type'),
'impact_value': insight.get('impact_value'),
'source_service': 'production',
'source_model': 'yield_predictor'
}
# Remove None values
event_metadata = {k: v for k, v in event_metadata.items() if v is not None}
# Publish recommendation event
await self.event_publisher.publish_recommendation(
event_type='ai_yield_prediction',
tenant_id=tenant_id,
severity=severity,
data=event_metadata
)
logger.info(
"Published yield insight recommendation event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
insight_type=insight.get('insight_type'),
severity=severity
)
except Exception as e:
logger.error(
"Failed to publish insight event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
error=str(e),
exc_info=True
)
# Don't raise - we don't want to fail the whole workflow if event publishing fails
async def analyze_all_recipes(
self,
tenant_id: str,

View File

@@ -62,14 +62,14 @@ class YieldPredictor:
- planned_quantity
- actual_quantity
- yield_percentage
- worker_id
- staff_assigned (list of staff IDs)
- started_at
- completed_at
- batch_size
- equipment_id (optional)
- notes (optional)
production_context: Upcoming production context:
- worker_id
- staff_assigned (list of staff IDs)
- planned_start_time
- batch_size
- equipment_id (optional)
@@ -212,6 +212,9 @@ class YieldPredictor:
df['is_small_batch'] = (df['batch_size'] < df['batch_size'].quantile(0.25)).astype(int)
# Worker experience features (proxy: number of previous runs)
# Extract first worker from staff_assigned list
df['worker_id'] = df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
df = df.sort_values('started_at')
df['worker_run_count'] = df.groupby('worker_id').cumcount() + 1
df['worker_experience_level'] = pd.cut(
@@ -232,6 +235,10 @@ class YieldPredictor:
factors = {}
# Worker impact
# Extract worker_id from staff_assigned for analysis
if 'worker_id' not in feature_df.columns:
feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
worker_yields = feature_df.groupby('worker_id')['yield_percentage'].agg(['mean', 'std', 'count'])
worker_yields = worker_yields[worker_yields['count'] >= 3] # Min 3 runs per worker
@@ -339,7 +346,10 @@ class YieldPredictor:
if 'duration_hours' in feature_df.columns:
feature_columns.append('duration_hours')
# Encode worker_id
# Encode worker_id (extracted from staff_assigned)
if 'worker_id' not in feature_df.columns:
feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
worker_encoding = {worker: idx for idx, worker in enumerate(feature_df['worker_id'].unique())}
feature_df['worker_encoded'] = feature_df['worker_id'].map(worker_encoding)
feature_columns.append('worker_encoded')
@@ -420,11 +430,15 @@ class YieldPredictor:
) -> Dict[str, Any]:
"""Predict yield for upcoming production run."""
# Extract context
worker_id = production_context.get('worker_id')
staff_assigned = production_context.get('staff_assigned', [])
worker_id = staff_assigned[0] if isinstance(staff_assigned, list) and len(staff_assigned) > 0 else 'unknown'
planned_start = pd.to_datetime(production_context.get('planned_start_time'))
batch_size = production_context.get('batch_size')
# Get worker experience
if 'worker_id' not in feature_df.columns:
feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
worker_runs = feature_df[feature_df['worker_id'] == worker_id]
worker_run_count = len(worker_runs) if len(worker_runs) > 0 else 1
@@ -578,7 +592,7 @@ class YieldPredictor:
'action': 'review_production_factors',
'params': {
'recipe_id': recipe_id,
'worker_id': production_context.get('worker_id')
'worker_id': worker_id
}
}]
})