Imporve monitoring
This commit is contained in:
44
Tiltfile
44
Tiltfile
@@ -63,6 +63,35 @@ Monitoring:
|
||||
Applying security configurations...
|
||||
""")
|
||||
|
||||
# Create Docker Hub secret for image pulls (if credentials are available)
|
||||
local_resource(
|
||||
'dockerhub-secret',
|
||||
cmd='''
|
||||
echo "🐳 Setting up Docker Hub image pull secret..."
|
||||
|
||||
# Check if Docker Hub credentials are available
|
||||
if [ -n "$DOCKERHUB_USERNAME" ] && [ -n "$DOCKERHUB_PASSWORD" ]; then
|
||||
echo " Found DOCKERHUB_USERNAME and DOCKERHUB_PASSWORD environment variables"
|
||||
./infrastructure/kubernetes/create-dockerhub-secret.sh
|
||||
elif [ -f "$HOME/.docker/config.json" ]; then
|
||||
echo " Attempting to use Docker CLI credentials..."
|
||||
./infrastructure/kubernetes/create-dockerhub-secret.sh
|
||||
else
|
||||
echo " ⚠️ Docker Hub credentials not found"
|
||||
echo " To enable automatic Docker Hub authentication:"
|
||||
echo " 1. Run 'docker login', OR"
|
||||
echo " 2. Set environment variables:"
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-password-or-token'"
|
||||
echo ""
|
||||
echo " Continuing without Docker Hub authentication..."
|
||||
echo " (This is OK for local development using local registry)"
|
||||
fi
|
||||
''',
|
||||
labels=['00-security'],
|
||||
auto_init=True
|
||||
)
|
||||
|
||||
# Apply security configurations before loading main manifests
|
||||
local_resource(
|
||||
'security-setup',
|
||||
@@ -75,6 +104,7 @@ local_resource(
|
||||
kubectl apply -f infrastructure/kubernetes/base/configmaps/postgres-logging-config.yaml
|
||||
echo "✅ Security configurations applied"
|
||||
''',
|
||||
resource_deps=['dockerhub-secret'],
|
||||
labels=['00-security'],
|
||||
auto_init=True
|
||||
)
|
||||
@@ -338,10 +368,20 @@ local_resource(
|
||||
echo "📊 Deploying SigNoz Monitoring Stack..."
|
||||
echo ""
|
||||
|
||||
# Ensure Docker Hub secret exists in bakery-ia namespace
|
||||
echo "🔐 Ensuring Docker Hub secret exists in bakery-ia namespace..."
|
||||
if ! kubectl get secret dockerhub-creds -n bakery-ia &>/dev/null; then
|
||||
echo " ⚠️ Docker Hub secret not found, attempting to create..."
|
||||
./infrastructure/kubernetes/create-dockerhub-secret.sh || echo " Continuing without Docker Hub authentication..."
|
||||
else
|
||||
echo " ✅ Docker Hub secret exists"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Check if SigNoz is already deployed
|
||||
if helm list -n signoz | grep -q signoz; then
|
||||
if helm list -n bakery-ia | grep -q signoz; then
|
||||
echo "✅ SigNoz already deployed, checking status..."
|
||||
helm status signoz -n signoz
|
||||
helm status signoz -n bakery-ia
|
||||
else
|
||||
echo "🚀 Installing SigNoz..."
|
||||
|
||||
|
||||
@@ -37,6 +37,14 @@ show_help() {
|
||||
$0 prod # Deploy to production
|
||||
$0 --upgrade prod # Upgrade production deployment
|
||||
$0 --remove dev # Remove development deployment"
|
||||
echo ""
|
||||
echo "Docker Hub Authentication:"
|
||||
echo " This script automatically creates a Docker Hub secret for image pulls."
|
||||
echo " Provide credentials via environment variables (recommended):"
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-personal-access-token'"
|
||||
echo " Or ensure you're logged in with Docker CLI:"
|
||||
echo " docker login"
|
||||
}
|
||||
|
||||
# Parse command line arguments
|
||||
@@ -124,6 +132,82 @@ ensure_namespace() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to create Docker Hub secret for image pulls
|
||||
create_dockerhub_secret() {
|
||||
echo "${BLUE}Setting up Docker Hub image pull secret...${NC}"
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
echo " (dry-run) Would create Docker Hub secret in namespace $NAMESPACE"
|
||||
return
|
||||
fi
|
||||
|
||||
# Check if secret already exists
|
||||
if kubectl get secret dockerhub-creds -n "$NAMESPACE" &> /dev/null; then
|
||||
echo "${GREEN}Docker Hub secret already exists in namespace $NAMESPACE.${NC}"
|
||||
return
|
||||
fi
|
||||
|
||||
# Check if Docker Hub credentials are available
|
||||
if [[ -n "$DOCKERHUB_USERNAME" ]] && [[ -n "$DOCKERHUB_PASSWORD" ]]; then
|
||||
echo "${BLUE}Found DOCKERHUB_USERNAME and DOCKERHUB_PASSWORD environment variables${NC}"
|
||||
|
||||
kubectl create secret docker-registry dockerhub-creds \
|
||||
--docker-server=https://index.docker.io/v1/ \
|
||||
--docker-username="$DOCKERHUB_USERNAME" \
|
||||
--docker-password="$DOCKERHUB_PASSWORD" \
|
||||
--docker-email="${DOCKERHUB_EMAIL:-noreply@bakery-ia.local}" \
|
||||
-n "$NAMESPACE"
|
||||
|
||||
echo "${GREEN}Docker Hub secret created successfully.${NC}"
|
||||
|
||||
elif [[ -f "$HOME/.docker/config.json" ]]; then
|
||||
echo "${BLUE}Attempting to use Docker CLI credentials...${NC}"
|
||||
|
||||
# Try to extract credentials from Docker config
|
||||
if grep -q "credsStore" "$HOME/.docker/config.json"; then
|
||||
echo "${YELLOW}Docker is using a credential store. Please set environment variables:${NC}"
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-password-or-token'"
|
||||
echo "${YELLOW}Continuing without Docker Hub authentication...${NC}"
|
||||
return
|
||||
fi
|
||||
|
||||
# Try to extract from base64 encoded auth
|
||||
AUTH=$(cat "$HOME/.docker/config.json" | jq -r '.auths["https://index.docker.io/v1/"].auth // empty' 2>/dev/null)
|
||||
if [[ -n "$AUTH" ]]; then
|
||||
echo "${GREEN}Found Docker Hub credentials in Docker config${NC}"
|
||||
local DOCKER_USERNAME=$(echo "$AUTH" | base64 -d | cut -d: -f1)
|
||||
local DOCKER_PASSWORD=$(echo "$AUTH" | base64 -d | cut -d: -f2-)
|
||||
|
||||
kubectl create secret docker-registry dockerhub-creds \
|
||||
--docker-server=https://index.docker.io/v1/ \
|
||||
--docker-username="$DOCKER_USERNAME" \
|
||||
--docker-password="$DOCKER_PASSWORD" \
|
||||
--docker-email="${DOCKERHUB_EMAIL:-noreply@bakery-ia.local}" \
|
||||
-n "$NAMESPACE"
|
||||
|
||||
echo "${GREEN}Docker Hub secret created successfully.${NC}"
|
||||
else
|
||||
echo "${YELLOW}Could not find Docker Hub credentials${NC}"
|
||||
echo "${YELLOW}To enable automatic Docker Hub authentication:${NC}"
|
||||
echo " 1. Run 'docker login', OR"
|
||||
echo " 2. Set environment variables:"
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-password-or-token'"
|
||||
echo "${YELLOW}Continuing without Docker Hub authentication...${NC}"
|
||||
fi
|
||||
else
|
||||
echo "${YELLOW}Docker Hub credentials not found${NC}"
|
||||
echo "${YELLOW}To enable automatic Docker Hub authentication:${NC}"
|
||||
echo " 1. Run 'docker login', OR"
|
||||
echo " 2. Set environment variables:"
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-password-or-token'"
|
||||
echo "${YELLOW}Continuing without Docker Hub authentication...${NC}"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Function to deploy SigNoz
|
||||
deploy_signoz() {
|
||||
local values_file="infrastructure/helm/signoz-values-$ENVIRONMENT.yaml"
|
||||
@@ -278,12 +362,15 @@ main() {
|
||||
|
||||
# Ensure namespace
|
||||
ensure_namespace
|
||||
|
||||
|
||||
if [[ "$REMOVE" == true ]]; then
|
||||
remove_signoz
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
# Create Docker Hub secret for image pulls
|
||||
create_dockerhub_secret
|
||||
|
||||
# Deploy SigNoz
|
||||
deploy_signoz
|
||||
|
||||
|
||||
@@ -7,74 +7,41 @@
|
||||
global:
|
||||
storageClass: "standard"
|
||||
domain: "monitoring.bakery-ia.local"
|
||||
# Docker Hub credentials for pulling images
|
||||
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
||||
imagePullSecrets:
|
||||
- name: dockerhub-creds
|
||||
- dockerhub-creds
|
||||
|
||||
# Frontend Configuration
|
||||
frontend:
|
||||
# Docker Hub credentials for pulling images (root level for SigNoz components)
|
||||
imagePullSecrets:
|
||||
- dockerhub-creds
|
||||
|
||||
# SignOz Main Component (includes frontend and query service)
|
||||
signoz:
|
||||
replicaCount: 1
|
||||
image:
|
||||
repository: signoz/frontend
|
||||
tag: 0.52.3
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 3301
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/rewrite-target: /$2
|
||||
nginx.ingress.kubernetes.io/use-regex: "true"
|
||||
hosts:
|
||||
- host: monitoring.bakery-ia.local
|
||||
paths:
|
||||
- path: /signoz(/|$)(.*)
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m # Reduced for local dev
|
||||
memory: 64Mi # Reduced for local dev
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
|
||||
env:
|
||||
- name: FRONTEND_REFRESH_INTERVAL
|
||||
value: "30000"
|
||||
- name: BASE_URL
|
||||
value: "https://monitoring.bakery-ia.local/signoz"
|
||||
|
||||
# Query Service Configuration
|
||||
queryService:
|
||||
replicaCount: 1
|
||||
image:
|
||||
repository: signoz/query-service
|
||||
tag: 0.52.3
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8080
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
annotations: {}
|
||||
hosts:
|
||||
- host: monitoring.bakery-ia.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
port: 8080
|
||||
tls: []
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m # Reduced for local dev
|
||||
memory: 128Mi # Reduced for local dev
|
||||
cpu: 100m # Combined frontend + query service
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
env:
|
||||
- name: DEPLOYMENT_TYPE
|
||||
value: "kubernetes-helm"
|
||||
- name: SIGNOZ_LOCAL_DB_PATH
|
||||
value: "/var/lib/signoz"
|
||||
cpu: 1000m
|
||||
memory: 1Gi
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
@@ -135,6 +102,10 @@ clickhouse:
|
||||
cpu: 1000m
|
||||
memory: 1Gi
|
||||
|
||||
# Zookeeper Configuration (required by ClickHouse)
|
||||
zookeeper:
|
||||
enabled: true
|
||||
|
||||
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
|
||||
otelCollector:
|
||||
enabled: true
|
||||
@@ -262,8 +233,8 @@ otelCollector:
|
||||
timeout: 10s
|
||||
|
||||
# ClickHouse exporter for metrics
|
||||
clickhousemetricswrite:
|
||||
endpoint: tcp://signoz-clickhouse:9000/?database=signoz_metrics
|
||||
signozclickhousemetrics:
|
||||
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics"
|
||||
timeout: 10s
|
||||
|
||||
# ClickHouse exporter for logs
|
||||
@@ -271,9 +242,9 @@ otelCollector:
|
||||
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
|
||||
timeout: 10s
|
||||
|
||||
# Logging exporter for debugging (optional)
|
||||
logging:
|
||||
loglevel: info
|
||||
# Debug exporter for debugging (optional)
|
||||
debug:
|
||||
verbosity: detailed
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
@@ -287,7 +258,7 @@ otelCollector:
|
||||
metrics:
|
||||
receivers: [otlp, postgresql/auth, postgresql/inventory, postgresql/orders, redis, rabbitmq]
|
||||
processors: [memory_limiter, batch, resourcedetection]
|
||||
exporters: [clickhousemetricswrite]
|
||||
exporters: [signozclickhousemetrics]
|
||||
|
||||
# Logs pipeline
|
||||
logs:
|
||||
|
||||
@@ -7,6 +7,13 @@
|
||||
global:
|
||||
storageClass: "standard"
|
||||
domain: "monitoring.bakewise.ai"
|
||||
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
||||
imagePullSecrets:
|
||||
- dockerhub-creds
|
||||
|
||||
# Docker Hub credentials for pulling images (root level for SigNoz components)
|
||||
imagePullSecrets:
|
||||
- dockerhub-creds
|
||||
|
||||
# Frontend Configuration
|
||||
frontend:
|
||||
@@ -351,8 +358,8 @@ otelCollector:
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
|
||||
clickhousemetricswrite:
|
||||
endpoint: tcp://clickhouse:9000/?database=signoz_metrics
|
||||
signozclickhousemetrics:
|
||||
endpoint: "tcp://clickhouse:9000/?database=signoz_metrics"
|
||||
timeout: 10s
|
||||
retry_on_failure:
|
||||
enabled: true
|
||||
@@ -369,9 +376,9 @@ otelCollector:
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
|
||||
# Minimal logging for prod
|
||||
logging:
|
||||
loglevel: warn
|
||||
# Debug exporter for debugging (replaces deprecated logging exporter)
|
||||
debug:
|
||||
verbosity: detailed
|
||||
sampling_initial: 2
|
||||
sampling_thereafter: 500
|
||||
|
||||
@@ -381,17 +388,17 @@ otelCollector:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch, resourcedetection, resource]
|
||||
exporters: [clickhousetraces, logging]
|
||||
exporters: [clickhousetraces, debug]
|
||||
|
||||
metrics:
|
||||
receivers: [otlp, prometheus]
|
||||
processors: [memory_limiter, batch, resourcedetection, resource]
|
||||
exporters: [clickhousemetricswrite]
|
||||
exporters: [signozclickhousemetrics]
|
||||
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch, resourcedetection, resource]
|
||||
exporters: [clickhouselogsexporter, logging]
|
||||
exporters: [clickhouselogsexporter, debug]
|
||||
|
||||
# OpenTelemetry Collector Deployment Mode
|
||||
otelCollectorDeployment:
|
||||
|
||||
@@ -1,125 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to add imagePullSecrets to all Kubernetes deployments, jobs, and cronjobs
|
||||
# This ensures all pods can pull images from Docker Hub using the dockerhub-creds secret
|
||||
|
||||
SECRET_NAME="dockerhub-creds"
|
||||
BASE_DIR="/Users/urtzialfaro/Documents/bakery-ia/infrastructure/kubernetes"
|
||||
|
||||
# ANSI color codes
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${BLUE}Adding imagePullSecrets to all Kubernetes resources...${NC}"
|
||||
echo "======================================================"
|
||||
echo ""
|
||||
|
||||
# Counter for files processed
|
||||
count=0
|
||||
|
||||
# Function to add imagePullSecrets to a file
|
||||
add_image_pull_secrets() {
|
||||
local file="$1"
|
||||
|
||||
# Check if file already has imagePullSecrets
|
||||
if grep -q "imagePullSecrets:" "$file"; then
|
||||
echo -e "${YELLOW} ⊘ Skipping (already has imagePullSecrets): $(basename $file)${NC}"
|
||||
return
|
||||
fi
|
||||
|
||||
# Temporary file for processing
|
||||
temp_file=$(mktemp)
|
||||
|
||||
# Process the file using awk to add imagePullSecrets after "spec:" in template or job spec
|
||||
awk '
|
||||
/^ spec:$/ && !done {
|
||||
print $0
|
||||
print " imagePullSecrets:"
|
||||
print " - name: dockerhub-creds"
|
||||
done = 1
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$file" > "$temp_file"
|
||||
|
||||
# Check if changes were made
|
||||
if ! cmp -s "$file" "$temp_file"; then
|
||||
mv "$temp_file" "$file"
|
||||
echo -e "${GREEN} ✓ Updated: $(basename $file)${NC}"
|
||||
((count++))
|
||||
else
|
||||
rm "$temp_file"
|
||||
echo -e "${YELLOW} ⊘ No changes needed: $(basename $file)${NC}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Process all service deployments
|
||||
echo -e "${BLUE}Processing service deployments...${NC}"
|
||||
find $BASE_DIR/base/components -name "*-service.yaml" | while read file; do
|
||||
if [ -f "$file" ]; then
|
||||
add_image_pull_secrets "$file"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Process all database deployments
|
||||
echo -e "${BLUE}Processing database deployments...${NC}"
|
||||
for file in $BASE_DIR/base/components/databases/*.yaml; do
|
||||
if [ -f "$file" ]; then
|
||||
add_image_pull_secrets "$file"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Process all migration jobs
|
||||
echo -e "${BLUE}Processing migration jobs...${NC}"
|
||||
for file in $BASE_DIR/base/migrations/*.yaml; do
|
||||
if [ -f "$file" ]; then
|
||||
add_image_pull_secrets "$file"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Process all cronjobs
|
||||
echo -e "${BLUE}Processing cronjobs...${NC}"
|
||||
for file in $BASE_DIR/base/cronjobs/*.yaml; do
|
||||
if [ -f "$file" ]; then
|
||||
add_image_pull_secrets "$file"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Process standalone jobs
|
||||
echo -e "${BLUE}Processing standalone jobs...${NC}"
|
||||
for file in $BASE_DIR/base/jobs/*.yaml; do
|
||||
if [ -f "$file" ]; then
|
||||
add_image_pull_secrets "$file"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Process deployments directory
|
||||
echo -e "${BLUE}Processing deployments...${NC}"
|
||||
for file in $BASE_DIR/base/deployments/*.yaml; do
|
||||
if [ -f "$file" ]; then
|
||||
add_image_pull_secrets "$file"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Process nominatim service
|
||||
if [ -f "$BASE_DIR/base/components/infrastructure/nominatim.yaml" ]; then
|
||||
echo -e "${BLUE}Processing nominatim service...${NC}"
|
||||
add_image_pull_secrets "$BASE_DIR/base/components/infrastructure/nominatim.yaml"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "======================================================"
|
||||
echo -e "${GREEN}Completed! Updated $count file(s)${NC}"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Review the changes: git diff"
|
||||
echo "2. Apply to cluster: kubectl apply -k infrastructure/kubernetes/overlays/dev"
|
||||
echo "3. Verify pods are running: kubectl get pods -n bakery-ia"
|
||||
@@ -1,94 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Script to add OpenTelemetry monitoring configuration to all service deployments
|
||||
# This adds the necessary environment variables for SigNoz integration
|
||||
# Note: No Prometheus annotations needed - all metrics go via OTLP push
|
||||
|
||||
set -e
|
||||
|
||||
SERVICES=(
|
||||
"ai-insights"
|
||||
"distribution"
|
||||
"external"
|
||||
"forecasting"
|
||||
"inventory"
|
||||
"notification"
|
||||
"orchestrator"
|
||||
"orders"
|
||||
"pos"
|
||||
"procurement"
|
||||
"production"
|
||||
"recipes"
|
||||
"sales"
|
||||
"suppliers"
|
||||
"tenant"
|
||||
"training"
|
||||
"frontend"
|
||||
)
|
||||
|
||||
echo "Adding OpenTelemetry configuration to all services..."
|
||||
echo ""
|
||||
|
||||
for service in "${SERVICES[@]}"; do
|
||||
SERVICE_FILE="infrastructure/kubernetes/base/components/${service}/${service}-service.yaml"
|
||||
|
||||
if [ ! -f "$SERVICE_FILE" ]; then
|
||||
echo "⚠️ Skipping $service (file not found: $SERVICE_FILE)"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "📝 Processing $service-service..."
|
||||
|
||||
# Check if already has OTEL env vars
|
||||
if grep -q "OTEL_COLLECTOR_ENDPOINT" "$SERVICE_FILE"; then
|
||||
echo " ✓ Already has OpenTelemetry configuration"
|
||||
else
|
||||
echo " + Adding OpenTelemetry environment variables"
|
||||
# Create a YAML patch
|
||||
cat > "/tmp/${service}-otel-patch.yaml" << 'EOF'
|
||||
env:
|
||||
# OpenTelemetry Configuration
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "SERVICE_NAME_PLACEHOLDER"
|
||||
- name: ENABLE_TRACING
|
||||
value: "true"
|
||||
# Logging Configuration
|
||||
- name: OTEL_LOGS_EXPORTER
|
||||
value: "otlp"
|
||||
- name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
|
||||
value: "true"
|
||||
# Metrics Configuration (all via OTLP, no Prometheus)
|
||||
- name: ENABLE_OTEL_METRICS
|
||||
value: "true"
|
||||
- name: ENABLE_SYSTEM_METRICS
|
||||
value: "true"
|
||||
EOF
|
||||
# Replace placeholder with actual service name
|
||||
sed -i.bak "s/SERVICE_NAME_PLACEHOLDER/${service}-service/g" "/tmp/${service}-otel-patch.yaml"
|
||||
|
||||
echo " ⚠️ Manual step required: Add env vars from /tmp/${service}-otel-patch.yaml"
|
||||
echo " Insert after 'ports:' section and before 'envFrom:' in $SERVICE_FILE"
|
||||
fi
|
||||
|
||||
echo " ✅ $service-service processed"
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "✅ Monitoring configuration prepared for all services!"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Review the changes and manually add env vars from /tmp/*-otel-patch.yaml files"
|
||||
echo "2. Update SigNoz: helm upgrade signoz signoz/signoz -n signoz -f infrastructure/helm/signoz-values-dev.yaml"
|
||||
echo "3. Restart services: kubectl rollout restart deployment -n bakery-ia"
|
||||
echo "4. Check SigNoz UI at https://monitoring.bakery-ia.local for incoming data"
|
||||
echo ""
|
||||
echo "What metrics you'll see:"
|
||||
echo " - HTTP requests (method, endpoint, status code, duration)"
|
||||
echo " - System metrics (CPU, memory usage per process)"
|
||||
echo " - System-wide metrics (total CPU, memory, disk I/O, network I/O)"
|
||||
echo " - Custom business metrics (registrations, orders, etc.)"
|
||||
echo " - All pushed via OpenTelemetry OTLP (no Prometheus scraping)"
|
||||
@@ -1,162 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to automatically add OpenTelemetry monitoring configuration to all service deployments.
|
||||
This adds environment variables for metrics, logs, and traces export to SigNoz.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Services to configure
|
||||
SERVICES = [
|
||||
"ai-insights",
|
||||
"distribution",
|
||||
"external",
|
||||
"forecasting",
|
||||
"inventory",
|
||||
"notification",
|
||||
"orchestrator",
|
||||
"orders",
|
||||
"pos",
|
||||
"procurement",
|
||||
"production",
|
||||
"recipes",
|
||||
"sales",
|
||||
"suppliers",
|
||||
"tenant",
|
||||
"training",
|
||||
]
|
||||
|
||||
OTEL_ENV_VARS_TEMPLATE = """ env:
|
||||
# OpenTelemetry Configuration
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "{service_name}"
|
||||
- name: ENABLE_TRACING
|
||||
value: "true"
|
||||
# Logging Configuration
|
||||
- name: OTEL_LOGS_EXPORTER
|
||||
value: "otlp"
|
||||
- name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
|
||||
value: "true"
|
||||
# Metrics Configuration (all via OTLP, no Prometheus)
|
||||
- name: ENABLE_OTEL_METRICS
|
||||
value: "true"
|
||||
- name: ENABLE_SYSTEM_METRICS
|
||||
value: "true"
|
||||
"""
|
||||
|
||||
|
||||
def has_otel_config(content: str) -> bool:
|
||||
"""Check if file already has OTEL configuration"""
|
||||
return "OTEL_COLLECTOR_ENDPOINT" in content
|
||||
|
||||
|
||||
def add_otel_config(content: str, service_name: str) -> str:
|
||||
"""Add OTEL configuration to service deployment"""
|
||||
|
||||
# Prepare the env vars with the service name
|
||||
env_vars = OTEL_ENV_VARS_TEMPLATE.format(service_name=f"{service_name}-service")
|
||||
|
||||
# Find the container section and add env vars before envFrom
|
||||
# Pattern: find " containers:" then first " envFrom:" after it
|
||||
pattern = r'( containers:\n - name: [^\n]+\n image: [^\n]+\n(?: ports:\n(?: - [^\n]+\n)+)?)( envFrom:)'
|
||||
|
||||
replacement = r'\1' + env_vars + r'\2'
|
||||
|
||||
# Try to replace
|
||||
new_content = re.sub(pattern, replacement, content, count=1)
|
||||
|
||||
if new_content == content:
|
||||
print(f" ⚠️ Warning: Could not find insertion point automatically")
|
||||
return content
|
||||
|
||||
return new_content
|
||||
|
||||
|
||||
def process_service(service_name: str, base_path: Path) -> bool:
|
||||
"""Process a single service deployment file"""
|
||||
|
||||
service_file = base_path / "components" / service_name / f"{service_name}-service.yaml"
|
||||
|
||||
if not service_file.exists():
|
||||
print(f" ⚠️ File not found: {service_file}")
|
||||
return False
|
||||
|
||||
# Read file
|
||||
with open(service_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Check if already configured
|
||||
if has_otel_config(content):
|
||||
print(f" ✓ Already configured")
|
||||
return True
|
||||
|
||||
# Add configuration
|
||||
new_content = add_otel_config(content, service_name)
|
||||
|
||||
if new_content == content:
|
||||
return False
|
||||
|
||||
# Write back
|
||||
with open(service_file, 'w') as f:
|
||||
f.write(new_content)
|
||||
|
||||
print(f" ✅ Updated successfully")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function"""
|
||||
|
||||
# Find base path
|
||||
script_dir = Path(__file__).parent
|
||||
base_path = script_dir / "base"
|
||||
|
||||
if not base_path.exists():
|
||||
print(f"❌ Error: Base path not found: {base_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print("=" * 60)
|
||||
print("Adding OpenTelemetry Monitoring Configuration")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
success_count = 0
|
||||
skip_count = 0
|
||||
fail_count = 0
|
||||
|
||||
for service in SERVICES:
|
||||
print(f"📝 Processing {service}-service...")
|
||||
|
||||
result = process_service(service, base_path)
|
||||
|
||||
if result:
|
||||
if has_otel_config(open(base_path / "components" / service / f"{service}-service.yaml").read()):
|
||||
success_count += 1
|
||||
else:
|
||||
fail_count += 1
|
||||
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print(f"✅ Successfully configured: {success_count}")
|
||||
if fail_count > 0:
|
||||
print(f"⚠️ Failed to configure: {fail_count}")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
print("Next steps:")
|
||||
print("1. Review the changes: git diff infrastructure/kubernetes/base/components/")
|
||||
print("2. Update SigNoz: helm upgrade signoz signoz/signoz -n signoz -f infrastructure/helm/signoz-values-dev.yaml")
|
||||
print("3. Apply changes: kubectl apply -k infrastructure/kubernetes/overlays/dev/")
|
||||
print("4. Verify: kubectl logs -n bakery-ia deployment/<service-name> | grep -i 'otel\\|metrics'")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -52,7 +52,7 @@ spec:
|
||||
name: whatsapp-secrets
|
||||
env:
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://otel-collector.monitoring.svc.cluster.local:4317"
|
||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4317"
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
|
||||
@@ -291,7 +291,7 @@ data:
|
||||
HEALTH_CHECK_INTERVAL: "30"
|
||||
|
||||
# Monitoring Configuration - SigNoz
|
||||
SIGNOZ_ROOT_URL: "http://localhost/signoz"
|
||||
SIGNOZ_ROOT_URL: "https://monitoring.bakery-ia.local"
|
||||
|
||||
# ================================================================
|
||||
# DATA COLLECTION SETTINGS
|
||||
@@ -390,9 +390,9 @@ data:
|
||||
OTEL_SERVICE_NAME: "bakery-ia"
|
||||
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=development"
|
||||
|
||||
# SigNoz Endpoints
|
||||
SIGNOZ_ENDPOINT: "http://signoz-query-service.signoz.svc.cluster.local:8080"
|
||||
SIGNOZ_FRONTEND_URL: "http://signoz-frontend.signoz.svc.cluster.local:3301"
|
||||
# SigNoz Endpoints (v0.106.0+ unified service)
|
||||
SIGNOZ_ENDPOINT: "http://signoz.signoz.svc.cluster.local:8080"
|
||||
SIGNOZ_FRONTEND_URL: "https://monitoring.bakery-ia.local"
|
||||
|
||||
# ================================================================
|
||||
# REPLENISHMENT PLANNING SETTINGS
|
||||
|
||||
@@ -63,22 +63,7 @@ spec:
|
||||
name: gateway-service
|
||||
port:
|
||||
number: 8000
|
||||
- host: monitoring.your-domain.com # To be overridden in overlays
|
||||
http:
|
||||
paths:
|
||||
# SigNoz Frontend UI and API (consolidated in newer versions)
|
||||
- path: /signoz(/|$)(.*)
|
||||
pathType: ImplementationSpecific
|
||||
backend:
|
||||
service:
|
||||
name: signoz
|
||||
port:
|
||||
number: 8080
|
||||
# SigNoz API endpoints
|
||||
- path: /signoz-api(/|$)(.*)
|
||||
pathType: ImplementationSpecific
|
||||
backend:
|
||||
service:
|
||||
name: signoz
|
||||
port:
|
||||
number: 8080
|
||||
# Note: SigNoz monitoring is deployed via Helm in the 'signoz' namespace
|
||||
# SigNoz creates its own Ingress via Helm chart configuration
|
||||
# Access at: https://monitoring.your-domain.com/ (configured in signoz-values.yaml)
|
||||
# SignOz ingress is managed separately - no need to configure here
|
||||
126
infrastructure/kubernetes/create-dockerhub-secret.sh
Executable file
126
infrastructure/kubernetes/create-dockerhub-secret.sh
Executable file
@@ -0,0 +1,126 @@
|
||||
#!/bin/bash
|
||||
|
||||
# =============================================================================
|
||||
# Create Docker Hub Image Pull Secret
|
||||
# =============================================================================
|
||||
# This script creates a Kubernetes secret for pulling images from Docker Hub.
|
||||
# The secret is used by both:
|
||||
# 1. bakery-ia namespace deployments (Tilt + Kustomize)
|
||||
# 2. Signoz Helm deployment
|
||||
#
|
||||
# Usage:
|
||||
# ./create-dockerhub-secret.sh
|
||||
#
|
||||
# Prerequisites:
|
||||
# - kubectl configured with access to the cluster
|
||||
# - DOCKERHUB_USERNAME and DOCKERHUB_PASSWORD environment variables set
|
||||
# - OR Docker CLI logged in (docker login)
|
||||
# =============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔐 Creating Docker Hub Image Pull Secret"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Check for required environment variables
|
||||
if [ -z "$DOCKERHUB_USERNAME" ] || [ -z "$DOCKERHUB_PASSWORD" ]; then
|
||||
echo "⚠️ DOCKERHUB_USERNAME and DOCKERHUB_PASSWORD environment variables not set"
|
||||
echo ""
|
||||
echo "Checking if Docker CLI is logged in..."
|
||||
|
||||
# Try to extract credentials from Docker config
|
||||
if [ -f "$HOME/.docker/config.json" ]; then
|
||||
# Check if using credential store
|
||||
if grep -q "credsStore" "$HOME/.docker/config.json"; then
|
||||
echo "⚠️ Docker is using a credential store. Please set environment variables manually:"
|
||||
echo ""
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-password-or-token'"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Try to extract from base64 encoded auth
|
||||
AUTH=$(cat "$HOME/.docker/config.json" | jq -r '.auths["https://index.docker.io/v1/"].auth // empty' 2>/dev/null)
|
||||
if [ -n "$AUTH" ]; then
|
||||
echo "✅ Found Docker Hub credentials in Docker config"
|
||||
DOCKERHUB_USERNAME=$(echo "$AUTH" | base64 -d | cut -d: -f1)
|
||||
DOCKERHUB_PASSWORD=$(echo "$AUTH" | base64 -d | cut -d: -f2-)
|
||||
else
|
||||
echo "❌ Could not find Docker Hub credentials"
|
||||
echo ""
|
||||
echo "Please either:"
|
||||
echo " 1. Run 'docker login' first, OR"
|
||||
echo " 2. Set environment variables:"
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-password-or-token'"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "❌ Docker config not found and environment variables not set"
|
||||
echo ""
|
||||
echo "Please set environment variables:"
|
||||
echo " export DOCKERHUB_USERNAME='your-username'"
|
||||
echo " export DOCKERHUB_PASSWORD='your-password-or-token'"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Using Docker Hub username: $DOCKERHUB_USERNAME"
|
||||
echo ""
|
||||
|
||||
# Function to create secret in a namespace
|
||||
create_secret_in_namespace() {
|
||||
local NAMESPACE=$1
|
||||
|
||||
echo "📦 Creating secret in namespace: $NAMESPACE"
|
||||
|
||||
# Create namespace if it doesn't exist
|
||||
if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
|
||||
echo " Creating namespace $NAMESPACE..."
|
||||
kubectl create namespace "$NAMESPACE"
|
||||
fi
|
||||
|
||||
# Delete existing secret if it exists
|
||||
if kubectl get secret dockerhub-creds -n "$NAMESPACE" &>/dev/null; then
|
||||
echo " Deleting existing secret..."
|
||||
kubectl delete secret dockerhub-creds -n "$NAMESPACE"
|
||||
fi
|
||||
|
||||
# Create the secret
|
||||
kubectl create secret docker-registry dockerhub-creds \
|
||||
--docker-server=https://index.docker.io/v1/ \
|
||||
--docker-username="$DOCKERHUB_USERNAME" \
|
||||
--docker-password="$DOCKERHUB_PASSWORD" \
|
||||
--docker-email="${DOCKERHUB_EMAIL:-noreply@bakery-ia.local}" \
|
||||
-n "$NAMESPACE"
|
||||
|
||||
echo " ✅ Secret created successfully"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Create secret in bakery-ia namespace (for Tilt deployments)
|
||||
create_secret_in_namespace "bakery-ia"
|
||||
|
||||
# Create secret in signoz namespace (for Signoz Helm deployment - if namespace exists)
|
||||
if kubectl get namespace signoz &>/dev/null; then
|
||||
create_secret_in_namespace "signoz"
|
||||
else
|
||||
echo "ℹ️ Signoz namespace not found, skipping (will be created on Helm install)"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "✅ Docker Hub secrets created successfully!"
|
||||
echo ""
|
||||
echo "The secret 'dockerhub-creds' is now available in:"
|
||||
echo " - bakery-ia namespace (for Tilt/Kustomize deployments)"
|
||||
if kubectl get namespace signoz &>/dev/null; then
|
||||
echo " - signoz namespace (for Signoz Helm deployment)"
|
||||
fi
|
||||
echo ""
|
||||
echo "All pods with imagePullSecrets: dockerhub-creds will now use these credentials"
|
||||
echo "to pull images from Docker Hub."
|
||||
echo ""
|
||||
@@ -73,14 +73,7 @@ spec:
|
||||
name: gateway-service
|
||||
port:
|
||||
number: 8000
|
||||
- host: monitoring.bakery-ia.local
|
||||
http:
|
||||
paths:
|
||||
# SigNoz Frontend UI
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: signoz
|
||||
port:
|
||||
number: 8080
|
||||
# Note: SigNoz monitoring is deployed via Helm in the 'signoz' namespace
|
||||
# SigNoz creates its own Ingress via Helm chart configuration (signoz-values-dev.yaml)
|
||||
# Access at: https://monitoring.bakery-ia.local/
|
||||
# SignOz is served at the root of the monitoring subdomain
|
||||
@@ -73,13 +73,13 @@ patches:
|
||||
value: "deployment.environment=production,cluster.name=bakery-ia-prod"
|
||||
- op: add
|
||||
path: /data/SIGNOZ_ENDPOINT
|
||||
value: "http://signoz-query-service.signoz.svc.cluster.local:8080"
|
||||
value: "http://signoz.signoz.svc.cluster.local:8080"
|
||||
- op: add
|
||||
path: /data/SIGNOZ_FRONTEND_URL
|
||||
value: "https://monitoring.bakewise.ai/signoz"
|
||||
value: "https://monitoring.bakewise.ai"
|
||||
- op: add
|
||||
path: /data/SIGNOZ_ROOT_URL
|
||||
value: "https://monitoring.bakewise.ai/signoz"
|
||||
value: "https://monitoring.bakewise.ai"
|
||||
- op: add
|
||||
path: /data/RATE_LIMIT_ENABLED
|
||||
value: "true"
|
||||
@@ -119,12 +119,12 @@ patches:
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
cpu: "1000m"
|
||||
# SigNoz Query Service production configuration
|
||||
# SigNoz Main Service production configuration (v0.106.0+ unified service)
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: signoz-query-service
|
||||
kind: StatefulSet
|
||||
name: signoz
|
||||
namespace: signoz
|
||||
patch: |-
|
||||
- op: replace
|
||||
@@ -134,11 +134,11 @@ patches:
|
||||
path: /spec/template/spec/containers/0/resources
|
||||
value:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "1000m"
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
cpu: "2000m"
|
||||
# SigNoz AlertManager production configuration
|
||||
- target:
|
||||
group: apps
|
||||
@@ -159,26 +159,6 @@ patches:
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
# SigNoz Frontend production configuration
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: signoz-frontend
|
||||
namespace: signoz
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/replicas
|
||||
value: 2
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources
|
||||
value:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
|
||||
images:
|
||||
- name: bakery/auth-service
|
||||
|
||||
@@ -28,10 +28,10 @@ data:
|
||||
OTEL_SERVICE_NAME: "bakery-ia"
|
||||
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=production,cluster.name=bakery-ia-prod"
|
||||
|
||||
# SigNoz Endpoints
|
||||
SIGNOZ_ENDPOINT: "http://signoz-query-service.signoz.svc.cluster.local:8080"
|
||||
SIGNOZ_FRONTEND_URL: "https://monitoring.bakewise.ai/signoz"
|
||||
SIGNOZ_ROOT_URL: "https://monitoring.bakewise.ai/signoz"
|
||||
# SigNoz Endpoints (v0.106.0+ unified service)
|
||||
SIGNOZ_ENDPOINT: "http://signoz.signoz.svc.cluster.local:8080"
|
||||
SIGNOZ_FRONTEND_URL: "https://monitoring.bakewise.ai"
|
||||
SIGNOZ_ROOT_URL: "https://monitoring.bakewise.ai"
|
||||
|
||||
# Rate Limiting (stricter in production)
|
||||
RATE_LIMIT_ENABLED: "true"
|
||||
|
||||
@@ -360,18 +360,6 @@ class DemoCleanupService:
|
||||
|
||||
logger.info("Demo session cleanup completed", stats=stats)
|
||||
|
||||
# Update Prometheus metrics
|
||||
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
|
||||
demo_session_cleanup_duration_seconds.labels(tier="all").observe(duration_ms / 1000)
|
||||
|
||||
# Update deleted sessions metrics by tier (we need to determine tiers from sessions)
|
||||
for session in all_sessions_to_cleanup:
|
||||
demo_sessions_deleted_total.labels(
|
||||
tier=session.demo_account_type,
|
||||
status="success"
|
||||
).inc()
|
||||
demo_sessions_active.labels(tier=session.demo_account_type).dec()
|
||||
|
||||
return stats
|
||||
|
||||
async def cleanup_old_destroyed_sessions(self, days: int = 7) -> int:
|
||||
|
||||
@@ -284,9 +284,7 @@ class CloneOrchestrator:
|
||||
)
|
||||
|
||||
start_time = datetime.now(timezone.utc)
|
||||
|
||||
# Update active sessions metric
|
||||
demo_sessions_active.labels(tier=demo_account_type).inc()
|
||||
|
||||
|
||||
# Filter services if specified
|
||||
services_to_clone = self.services
|
||||
@@ -383,29 +381,6 @@ class CloneOrchestrator:
|
||||
services_status=all_services,
|
||||
demo_account_type=demo_account_type
|
||||
)
|
||||
|
||||
# Update Prometheus metrics
|
||||
demo_session_creation_duration_seconds.labels(tier=demo_account_type).observe(duration_ms / 1000)
|
||||
demo_sessions_created_total.labels(tier=demo_account_type, status=overall_status).inc()
|
||||
|
||||
# Update alert and insight metrics if available
|
||||
if result.get("alert_generation"):
|
||||
alert_gen = result["alert_generation"]
|
||||
for alert_type, alerts in alert_gen.items():
|
||||
if isinstance(alerts, dict) and alerts.get("alerts_generated"):
|
||||
demo_alerts_generated_total.labels(
|
||||
tier=demo_account_type,
|
||||
alert_type=alert_type
|
||||
).inc(alerts["alerts_generated"])
|
||||
|
||||
if result.get("ai_insights_generation"):
|
||||
insights_gen = result["ai_insights_generation"]
|
||||
for insight_type, insights in insights_gen.items():
|
||||
if isinstance(insights, dict) and insights.get("insights_posted"):
|
||||
demo_ai_insights_generated_total.labels(
|
||||
tier=demo_account_type,
|
||||
insight_type=insight_type
|
||||
).inc(insights["insights_posted"])
|
||||
|
||||
return result
|
||||
|
||||
@@ -549,20 +524,6 @@ class CloneOrchestrator:
|
||||
duration_ms=duration_ms
|
||||
)
|
||||
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="success"
|
||||
).inc()
|
||||
demo_cross_service_call_duration_seconds.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name
|
||||
).observe(duration_seconds)
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
logger.info(
|
||||
@@ -582,17 +543,6 @@ class CloneOrchestrator:
|
||||
response_text=response.text
|
||||
)
|
||||
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="http_error"
|
||||
).inc()
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
@@ -614,22 +564,6 @@ class CloneOrchestrator:
|
||||
url=service.url
|
||||
)
|
||||
|
||||
# Update error metrics
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="timeout"
|
||||
).inc()
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
@@ -650,22 +584,6 @@ class CloneOrchestrator:
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
# Update error metrics
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="network_error"
|
||||
).inc()
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
@@ -686,22 +604,6 @@ class CloneOrchestrator:
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
# Update error metrics
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="exception"
|
||||
).inc()
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
|
||||
Reference in New Issue
Block a user