Update monitoring packages to latest versions

- Updated all OpenTelemetry packages to latest versions:
  - opentelemetry-api: 1.27.0 → 1.39.1
  - opentelemetry-sdk: 1.27.0 → 1.39.1
  - opentelemetry-exporter-otlp-proto-grpc: 1.27.0 → 1.39.1
  - opentelemetry-exporter-otlp-proto-http: 1.27.0 → 1.39.1
  - opentelemetry-instrumentation-fastapi: 0.48b0 → 0.60b1
  - opentelemetry-instrumentation-httpx: 0.48b0 → 0.60b1
  - opentelemetry-instrumentation-redis: 0.48b0 → 0.60b1
  - opentelemetry-instrumentation-sqlalchemy: 0.48b0 → 0.60b1

- Removed prometheus-client==0.23.1 from all services
- Unified all services to use the same monitoring package versions

Generated by Mistral Vibe.
Co-Authored-By: Mistral Vibe <vibe@mistral.ai>
This commit is contained in:
Urtzi Alfaro
2026-01-08 19:25:52 +01:00
parent dfb7e4b237
commit 29d19087f1
129 changed files with 5718 additions and 1821 deletions

View File

@@ -0,0 +1,298 @@
#!/bin/bash
# ============================================================================
# SigNoz Deployment Script for Bakery IA
# ============================================================================
# This script deploys SigNoz monitoring stack using Helm
# Supports both development and production environments
# ============================================================================
set -e
# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to display help
show_help() {
echo "Usage: $0 [OPTIONS] ENVIRONMENT"
echo ""
echo "Deploy SigNoz monitoring stack for Bakery IA"
echo ""
echo "Arguments:
ENVIRONMENT Environment to deploy to (dev|prod)"
echo ""
echo "Options:
-h, --help Show this help message
-d, --dry-run Dry run - show what would be done without actually deploying
-u, --upgrade Upgrade existing deployment
-r, --remove Remove/Uninstall SigNoz deployment
-n, --namespace NAMESPACE Specify namespace (default: signoz)"
echo ""
echo "Examples:
$0 dev # Deploy to development
$0 prod # Deploy to production
$0 --upgrade prod # Upgrade production deployment
$0 --remove dev # Remove development deployment"
}
# Parse command line arguments
DRY_RUN=false
UPGRADE=false
REMOVE=false
NAMESPACE="signoz"
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
show_help
exit 0
;;
-d|--dry-run)
DRY_RUN=true
shift
;;
-u|--upgrade)
UPGRADE=true
shift
;;
-r|--remove)
REMOVE=true
shift
;;
-n|--namespace)
NAMESPACE="$2"
shift 2
;;
dev|prod)
ENVIRONMENT="$1"
shift
;;
*)
echo "Unknown argument: $1"
show_help
exit 1
;;
esac
done
# Validate environment
if [[ -z "$ENVIRONMENT" ]]; then
echo "Error: Environment not specified. Use 'dev' or 'prod'."
show_help
exit 1
fi
if [[ "$ENVIRONMENT" != "dev" && "$ENVIRONMENT" != "prod" ]]; then
echo "Error: Invalid environment. Use 'dev' or 'prod'."
exit 1
fi
# Function to check if Helm is installed
check_helm() {
if ! command -v helm &> /dev/null; then
echo "${RED}Error: Helm is not installed. Please install Helm first.${NC}"
echo "Installation instructions: https://helm.sh/docs/intro/install/"
exit 1
fi
}
# Function to check if kubectl is configured
check_kubectl() {
if ! kubectl cluster-info &> /dev/null; then
echo "${RED}Error: kubectl is not configured or cannot connect to cluster.${NC}"
echo "Please ensure you have access to a Kubernetes cluster."
exit 1
fi
}
# Function to check if namespace exists, create if not
ensure_namespace() {
if ! kubectl get namespace "$NAMESPACE" &> /dev/null; then
echo "${BLUE}Creating namespace $NAMESPACE...${NC}"
if [[ "$DRY_RUN" == true ]]; then
echo " (dry-run) Would create namespace $NAMESPACE"
else
kubectl create namespace "$NAMESPACE"
echo "${GREEN}Namespace $NAMESPACE created.${NC}"
fi
else
echo "${BLUE}Namespace $NAMESPACE already exists.${NC}"
fi
}
# Function to deploy SigNoz
deploy_signoz() {
local values_file="infrastructure/helm/signoz-values-$ENVIRONMENT.yaml"
if [[ ! -f "$values_file" ]]; then
echo "${RED}Error: Values file $values_file not found.${NC}"
exit 1
fi
echo "${BLUE}Deploying SigNoz to $ENVIRONMENT environment...${NC}"
echo " Using values file: $values_file"
echo " Target namespace: $NAMESPACE"
if [[ "$DRY_RUN" == true ]]; then
echo " (dry-run) Would deploy SigNoz with:"
echo " helm install signoz signoz/signoz -n $NAMESPACE -f $values_file"
return
fi
# Use upgrade --install to handle both new installations and upgrades
echo "${BLUE}Installing/Upgrading SigNoz...${NC}"
helm upgrade --install signoz signoz/signoz -n "$NAMESPACE" -f "$values_file"
echo "${GREEN}SigNoz deployment initiated.${NC}"
echo "Waiting for pods to become ready..."
# Wait for deployment to complete
wait_for_deployment
}
# Function to remove SigNoz
remove_signoz() {
echo "${BLUE}Removing SigNoz deployment from namespace $NAMESPACE...${NC}"
if [[ "$DRY_RUN" == true ]]; then
echo " (dry-run) Would remove SigNoz deployment"
return
fi
if helm list -n "$NAMESPACE" | grep -q signoz; then
helm uninstall signoz -n "$NAMESPACE"
echo "${GREEN}SigNoz deployment removed.${NC}"
else
echo "${YELLOW}No SigNoz deployment found in namespace $NAMESPACE.${NC}"
fi
}
# Function to wait for deployment to complete
wait_for_deployment() {
echo "${BLUE}Waiting for SigNoz pods to become ready...${NC}"
# Wait for pods to be ready
local timeout=600 # 10 minutes
local start_time=$(date +%s)
while true; do
local current_time=$(date +%s)
local elapsed=$((current_time - start_time))
if [[ $elapsed -ge $timeout ]]; then
echo "${RED}Timeout waiting for SigNoz pods to become ready.${NC}"
break
fi
# Check pod status
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep -c "Running" | tr -d '[:space:]' || echo "0")
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d '[:space:]' || echo "0")
if [[ $ready_pods -eq 0 ]]; then
echo " Waiting for pods to start..."
else
echo " $ready_pods/$total_pods pods are running"
if [[ $ready_pods -eq $total_pods && $total_pods -gt 0 ]]; then
echo "${GREEN}All SigNoz pods are running!${NC}"
break
fi
fi
sleep 10
done
# Show deployment status
show_deployment_status
}
# Function to show deployment status
show_deployment_status() {
echo ""
echo "${BLUE}=== SigNoz Deployment Status ===${NC}"
echo ""
# Get pods
echo "Pods:"
kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
echo ""
# Get services
echo "Services:"
kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
echo ""
# Get ingress
echo "Ingress:"
kubectl get ingress -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
echo ""
# Show access information
show_access_info
}
# Function to show access information
show_access_info() {
echo "${BLUE}=== Access Information ===${NC}"
if [[ "$ENVIRONMENT" == "dev" ]]; then
echo "SigNoz UI: https://localhost/signoz"
echo "SigNoz API: https://localhost/signoz-api"
echo ""
echo "OpenTelemetry Collector Endpoints:"
echo " gRPC: localhost:4317"
echo " HTTP: localhost:4318"
echo " Metrics: localhost:8888"
else
echo "SigNoz UI: https://monitoring.bakewise.ai/signoz"
echo "SigNoz API: https://monitoring.bakewise.ai/signoz-api"
echo "SigNoz Alerts: https://monitoring.bakewise.ai/signoz-alerts"
echo ""
echo "OpenTelemetry Collector Endpoints:"
echo " gRPC: monitoring.bakewise.ai:4317"
echo " HTTP: monitoring.bakewise.ai:4318"
fi
echo ""
echo "Default credentials:"
echo " Username: admin"
echo " Password: admin"
echo ""
}
# Main execution
main() {
echo "${BLUE}"
echo "=========================================="
echo "🚀 SigNoz Deployment for Bakery IA"
echo "=========================================="
echo "${NC}"
# Check prerequisites
check_helm
check_kubectl
# Ensure namespace
ensure_namespace
if [[ "$REMOVE" == true ]]; then
remove_signoz
exit 0
fi
# Deploy SigNoz
deploy_signoz
echo "${GREEN}"
echo "=========================================="
echo "✅ SigNoz deployment completed!"
echo "=========================================="
echo "${NC}"
}
# Run main function
main

View File

@@ -6,7 +6,10 @@
global:
storageClass: "standard"
domain: "localhost"
domain: "monitoring.bakery-ia.local"
# Docker Hub credentials for pulling images
imagePullSecrets:
- name: dockerhub-creds
# Frontend Configuration
frontend:
@@ -27,7 +30,7 @@ frontend:
nginx.ingress.kubernetes.io/rewrite-target: /$2
nginx.ingress.kubernetes.io/use-regex: "true"
hosts:
- host: localhost
- host: monitoring.bakery-ia.local
paths:
- path: /signoz(/|$)(.*)
pathType: ImplementationSpecific
@@ -35,8 +38,8 @@ frontend:
resources:
requests:
cpu: 50m
memory: 128Mi
cpu: 25m # Reduced for local dev
memory: 64Mi # Reduced for local dev
limits:
cpu: 200m
memory: 256Mi
@@ -44,6 +47,8 @@ frontend:
env:
- name: FRONTEND_REFRESH_INTERVAL
value: "30000"
- name: BASE_URL
value: "https://monitoring.bakery-ia.local/signoz"
# Query Service Configuration
queryService:
@@ -59,8 +64,8 @@ queryService:
resources:
requests:
cpu: 100m
memory: 256Mi
cpu: 50m # Reduced for local dev
memory: 128Mi # Reduced for local dev
limits:
cpu: 500m
memory: 512Mi
@@ -90,8 +95,8 @@ alertmanager:
resources:
requests:
cpu: 50m
memory: 128Mi
cpu: 25m # Reduced for local dev
memory: 64Mi # Reduced for local dev
limits:
cpu: 200m
memory: 256Mi
@@ -115,76 +120,59 @@ alertmanager:
# Add email, slack, webhook configs here
# ClickHouse Configuration - Time Series Database
# Minimal resources for local development on constrained Kind cluster
clickhouse:
replicaCount: 1
image:
repository: clickhouse/clickhouse-server
tag: 24.1.2-alpine
pullPolicy: IfNotPresent
enabled: true
installCustomStorageClass: false
service:
type: ClusterIP
httpPort: 8123
tcpPort: 9000
# Reduce ClickHouse resource requests for local dev
clickhouse:
resources:
requests:
cpu: 200m # Reduced from default 500m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
persistence:
enabled: true
size: 10Gi
storageClass: "standard"
# ClickHouse configuration
config:
logger:
level: information
max_connections: 1024
max_concurrent_queries: 100
# Data retention (7 days for dev)
merge_tree:
parts_to_delay_insert: 150
parts_to_throw_insert: 300
# OpenTelemetry Collector - Integrated with SigNoz
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
otelCollector:
enabled: true
replicaCount: 1
image:
repository: signoz/signoz-otel-collector
tag: 0.102.8
pullPolicy: IfNotPresent
# Service configuration - expose both gRPC and HTTP endpoints
service:
type: ClusterIP
ports:
otlpGrpc: 4317
otlpHttp: 4318
metrics: 8888
healthCheck: 13133
# gRPC receivers
- name: otlp-grpc
port: 4317
targetPort: 4317
protocol: TCP
# HTTP receivers
- name: otlp-http
port: 4318
targetPort: 4318
protocol: TCP
# Prometheus remote write
- name: prometheus
port: 8889
targetPort: 8889
protocol: TCP
resources:
requests:
cpu: 100m
memory: 256Mi
cpu: 50m # Reduced from 100m
memory: 128Mi # Reduced from 256Mi
limits:
cpu: 500m
memory: 512Mi
# Full OTEL Collector Configuration
# OpenTelemetry Collector configuration
config:
extensions:
health_check:
endpoint: 0.0.0.0:13133
zpages:
endpoint: 0.0.0.0:55679
receivers:
# OTLP receivers for traces, metrics, and logs from applications
# All application telemetry is pushed via OTLP protocol
otlp:
protocols:
grpc:
@@ -193,105 +181,119 @@ otelCollector:
endpoint: 0.0.0.0:4318
cors:
allowed_origins:
- "http://localhost"
- "https://localhost"
- "*"
# Prometheus receiver for scraping metrics
prometheus:
config:
scrape_configs:
- job_name: 'otel-collector'
scrape_interval: 30s
static_configs:
- targets: ['localhost:8888']
# PostgreSQL receivers for database metrics
# Collects metrics directly from PostgreSQL databases
postgresql/auth:
endpoint: auth-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- auth_db
collection_interval: 60s
tls:
insecure: false
postgresql/inventory:
endpoint: inventory-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- inventory_db
collection_interval: 60s
tls:
insecure: false
postgresql/orders:
endpoint: orders-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- orders_db
collection_interval: 60s
tls:
insecure: false
# Add more PostgreSQL databases as needed
# postgresql/SERVICE:
# endpoint: SERVICE-db-service.bakery-ia:5432
# ...
# Redis receiver for cache metrics
redis:
endpoint: redis-service.bakery-ia:6379
password: ${REDIS_PASSWORD}
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/redis-tls/redis-cert.pem
key_file: /etc/redis-tls/redis-key.pem
ca_file: /etc/redis-tls/ca-cert.pem
# RabbitMQ receiver via management API
rabbitmq:
endpoint: http://rabbitmq-service.bakery-ia:15672
username: ${RABBITMQ_USER}
password: ${RABBITMQ_PASSWORD}
collection_interval: 60s
processors:
# Batch processor for better performance
batch:
timeout: 10s
send_batch_size: 1024
# Memory limiter to prevent OOM
memory_limiter:
check_interval: 1s
limit_mib: 400
spike_limit_mib: 100
# Resource detection for K8s
# Resource detection
resourcedetection:
detectors: [env, system, docker]
detectors: [env, system]
timeout: 5s
# Add resource attributes
resource:
attributes:
- key: deployment.environment
value: development
action: upsert
exporters:
# Export to SigNoz ClickHouse
# ClickHouse exporter for traces
clickhousetraces:
datasource: tcp://clickhouse:9000/?database=signoz_traces
datasource: tcp://signoz-clickhouse:9000/?database=signoz_traces
timeout: 10s
# ClickHouse exporter for metrics
clickhousemetricswrite:
endpoint: tcp://clickhouse:9000/?database=signoz_metrics
endpoint: tcp://signoz-clickhouse:9000/?database=signoz_metrics
timeout: 10s
# ClickHouse exporter for logs
clickhouselogsexporter:
dsn: tcp://clickhouse:9000/?database=signoz_logs
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
timeout: 10s
# Debug logging
# Logging exporter for debugging (optional)
logging:
loglevel: info
sampling_initial: 5
sampling_thereafter: 200
service:
extensions: [health_check, zpages]
pipelines:
# Traces pipeline
traces:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection, resource]
exporters: [clickhousetraces, logging]
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhousetraces]
# Metrics pipeline
metrics:
receivers: [otlp, prometheus]
processors: [memory_limiter, batch, resourcedetection, resource]
receivers: [otlp, postgresql/auth, postgresql/inventory, postgresql/orders, redis, rabbitmq]
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhousemetricswrite]
# Logs pipeline
logs:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection, resource]
exporters: [clickhouselogsexporter, logging]
# OpenTelemetry Collector Deployment Mode
otelCollectorDeployment:
enabled: true
mode: deployment
# Node Exporter for infrastructure metrics (optional)
nodeExporter:
enabled: true
service:
type: ClusterIP
port: 9100
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 100m
memory: 128Mi
# Schemamanager - Manages ClickHouse schema
schemamanager:
enabled: true
image:
repository: signoz/signoz-schema-migrator
tag: 0.52.3
pullPolicy: IfNotPresent
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhouselogsexporter]
# Additional Configuration
serviceAccount:

View File

@@ -0,0 +1,394 @@
#!/bin/bash
# ============================================================================
# SigNoz Verification Script for Bakery IA
# ============================================================================
# This script verifies that SigNoz is properly deployed and functioning
# ============================================================================
set -e
# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to display help
show_help() {
echo "Usage: $0 [OPTIONS] ENVIRONMENT"
echo ""
echo "Verify SigNoz deployment for Bakery IA"
echo ""
echo "Arguments:
ENVIRONMENT Environment to verify (dev|prod)"
echo ""
echo "Options:
-h, --help Show this help message
-n, --namespace NAMESPACE Specify namespace (default: signoz)"
echo ""
echo "Examples:
$0 dev # Verify development deployment
$0 prod # Verify production deployment
$0 --namespace monitoring dev # Verify with custom namespace"
}
# Parse command line arguments
NAMESPACE="signoz"
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
show_help
exit 0
;;
-n|--namespace)
NAMESPACE="$2"
shift 2
;;
dev|prod)
ENVIRONMENT="$1"
shift
;;
*)
echo "Unknown argument: $1"
show_help
exit 1
;;
esac
done
# Validate environment
if [[ -z "$ENVIRONMENT" ]]; then
echo "Error: Environment not specified. Use 'dev' or 'prod'."
show_help
exit 1
fi
if [[ "$ENVIRONMENT" != "dev" && "$ENVIRONMENT" != "prod" ]]; then
echo "Error: Invalid environment. Use 'dev' or 'prod'."
exit 1
fi
# Function to check if kubectl is configured
check_kubectl() {
if ! kubectl cluster-info &> /dev/null; then
echo "${RED}Error: kubectl is not configured or cannot connect to cluster.${NC}"
echo "Please ensure you have access to a Kubernetes cluster."
exit 1
fi
}
# Function to check namespace exists
check_namespace() {
if ! kubectl get namespace "$NAMESPACE" &> /dev/null; then
echo "${RED}Error: Namespace $NAMESPACE does not exist.${NC}"
echo "Please deploy SigNoz first using: ./deploy-signoz.sh $ENVIRONMENT"
exit 1
fi
}
# Function to verify SigNoz deployment
verify_deployment() {
echo "${BLUE}"
echo "=========================================="
echo "🔍 Verifying SigNoz Deployment"
echo "=========================================="
echo "Environment: $ENVIRONMENT"
echo "Namespace: $NAMESPACE"
echo "${NC}"
echo ""
# Check if SigNoz helm release exists
echo "${BLUE}1. Checking Helm release...${NC}"
if helm list -n "$NAMESPACE" | grep -q signoz; then
echo "${GREEN}✅ SigNoz Helm release found${NC}"
else
echo "${RED}❌ SigNoz Helm release not found${NC}"
echo "Please deploy SigNoz first using: ./deploy-signoz.sh $ENVIRONMENT"
exit 1
fi
echo ""
# Check pod status
echo "${BLUE}2. Checking pod status...${NC}"
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
local running_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep -c "Running" || echo "0")
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep "Running" | grep "1/1" | wc -l | tr -d ' ' || echo "0")
echo "Total pods: $total_pods"
echo "Running pods: $running_pods"
echo "Ready pods: $ready_pods"
if [[ $total_pods -eq 0 ]]; then
echo "${RED}❌ No SigNoz pods found${NC}"
exit 1
fi
if [[ $running_pods -eq $total_pods ]]; then
echo "${GREEN}✅ All pods are running${NC}"
else
echo "${YELLOW}⚠️ Some pods are not running${NC}"
fi
if [[ $ready_pods -eq $total_pods ]]; then
echo "${GREEN}✅ All pods are ready${NC}"
else
echo "${YELLOW}⚠️ Some pods are not ready${NC}"
fi
echo ""
# Show pod details
echo "${BLUE}Pod Details:${NC}"
kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
echo ""
# Check services
echo "${BLUE}3. Checking services...${NC}"
local service_count=$(kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
if [[ $service_count -gt 0 ]]; then
echo "${GREEN}✅ Services found ($service_count services)${NC}"
kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${RED}❌ No services found${NC}"
fi
echo ""
# Check ingress
echo "${BLUE}4. Checking ingress...${NC}"
local ingress_count=$(kubectl get ingress -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
if [[ $ingress_count -gt 0 ]]; then
echo "${GREEN}✅ Ingress found ($ingress_count ingress resources)${NC}"
kubectl get ingress -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${YELLOW}⚠️ No ingress found (may be configured in main namespace)${NC}"
fi
echo ""
# Check PVCs
echo "${BLUE}5. Checking persistent volume claims...${NC}"
local pvc_count=$(kubectl get pvc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
if [[ $pvc_count -gt 0 ]]; then
echo "${GREEN}✅ PVCs found ($pvc_count PVCs)${NC}"
kubectl get pvc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${YELLOW}⚠️ No PVCs found (may not be required for all components)${NC}"
fi
echo ""
# Check resource usage
echo "${BLUE}6. Checking resource usage...${NC}"
if command -v kubectl &> /dev/null && kubectl top pods -n "$NAMESPACE" &> /dev/null; then
echo "${GREEN}✅ Resource usage:${NC}"
kubectl top pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${YELLOW}⚠️ Metrics server not available or no resource usage data${NC}"
fi
echo ""
# Check logs for errors
echo "${BLUE}7. Checking for errors in logs...${NC}"
local error_found=false
# Check each pod for errors
while IFS= read -r pod; do
if [[ -n "$pod" ]]; then
local pod_errors=$(kubectl logs -n "$NAMESPACE" "$pod" 2>/dev/null | grep -i "error\|exception\|fail\|crash" | wc -l || echo "0")
if [[ $pod_errors -gt 0 ]]; then
echo "${RED}❌ Errors found in pod $pod ($pod_errors errors)${NC}"
error_found=true
fi
fi
done < <(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz -o name | sed 's|pod/||')
if [[ "$error_found" == false ]]; then
echo "${GREEN}✅ No errors found in logs${NC}"
fi
echo ""
# Environment-specific checks
if [[ "$ENVIRONMENT" == "dev" ]]; then
verify_dev_specific
else
verify_prod_specific
fi
# Show access information
show_access_info
}
# Function for development-specific verification
verify_dev_specific() {
echo "${BLUE}8. Development-specific checks...${NC}"
# Check if localhost ingress is configured
if kubectl get ingress -n "$NAMESPACE" | grep -q "localhost"; then
echo "${GREEN}✅ Localhost ingress configured${NC}"
else
echo "${YELLOW}⚠️ Localhost ingress not found${NC}"
fi
# Check resource limits (should be lower for dev)
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
if [[ -n "$query_service" && "$query_service" == "512Mi" ]]; then
echo "${GREEN}✅ Development resource limits applied${NC}"
else
echo "${YELLOW}⚠️ Resource limits may not be optimized for development${NC}"
fi
echo ""
}
# Function for production-specific verification
verify_prod_specific() {
echo "${BLUE}8. Production-specific checks...${NC}"
# Check if TLS is configured
if kubectl get ingress -n "$NAMESPACE" | grep -q "signoz-tls-cert"; then
echo "${GREEN}✅ TLS certificate configured${NC}"
else
echo "${YELLOW}⚠️ TLS certificate not found${NC}"
fi
# Check if multiple replicas are running
local query_replicas=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1")
if [[ $query_replicas -gt 1 ]]; then
echo "${GREEN}✅ High availability configured ($query_replicas replicas)${NC}"
else
echo "${YELLOW}⚠️ Single replica detected (not highly available)${NC}"
fi
# Check resource limits (should be higher for prod)
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
if [[ -n "$query_service" && "$query_service" == "2Gi" ]]; then
echo "${GREEN}✅ Production resource limits applied${NC}"
else
echo "${YELLOW}⚠️ Resource limits may not be optimized for production${NC}"
fi
echo ""
}
# Function to show access information
show_access_info() {
echo "${BLUE}"
echo "=========================================="
echo "📋 Access Information"
echo "=========================================="
echo "${NC}"
if [[ "$ENVIRONMENT" == "dev" ]]; then
echo "SigNoz UI: https://localhost/signoz"
echo "SigNoz API: https://localhost/signoz-api"
echo ""
echo "OpenTelemetry Collector:"
echo " gRPC: localhost:4317"
echo " HTTP: localhost:4318"
echo " Metrics: localhost:8888"
else
echo "SigNoz UI: https://monitoring.bakewise.ai/signoz"
echo "SigNoz API: https://monitoring.bakewise.ai/signoz-api"
echo "SigNoz Alerts: https://monitoring.bakewise.ai/signoz-alerts"
echo ""
echo "OpenTelemetry Collector:"
echo " gRPC: monitoring.bakewise.ai:4317"
echo " HTTP: monitoring.bakewise.ai:4318"
fi
echo ""
echo "Default Credentials:"
echo " Username: admin"
echo " Password: admin"
echo ""
# Show connection test commands
echo "Connection Test Commands:"
if [[ "$ENVIRONMENT" == "dev" ]]; then
echo " curl -k https://localhost/signoz"
echo " curl -k https://localhost/signoz-api/health"
else
echo " curl https://monitoring.bakewise.ai/signoz"
echo " curl https://monitoring.bakewise.ai/signoz-api/health"
fi
echo ""
}
# Function to run connectivity tests
run_connectivity_tests() {
echo "${BLUE}"
echo "=========================================="
echo "🔗 Running Connectivity Tests"
echo "=========================================="
echo "${NC}"
if [[ "$ENVIRONMENT" == "dev" ]]; then
# Test frontend
echo "Testing SigNoz frontend..."
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz | grep -q "200\|302"; then
echo "${GREEN}✅ Frontend accessible${NC}"
else
echo "${RED}❌ Frontend not accessible${NC}"
fi
# Test API
echo "Testing SigNoz API..."
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz-api/health | grep -q "200"; then
echo "${GREEN}✅ API accessible${NC}"
else
echo "${RED}❌ API not accessible${NC}"
fi
# Test OTEL collector
echo "Testing OpenTelemetry collector..."
if curl -s -o /dev/null -w "%{http_code}" http://localhost:8888/metrics | grep -q "200"; then
echo "${GREEN}✅ OTEL collector accessible${NC}"
else
echo "${YELLOW}⚠️ OTEL collector not accessible (may not be exposed)${NC}"
fi
else
echo "${YELLOW}⚠️ Production connectivity tests require valid DNS and TLS${NC}"
echo " Please ensure monitoring.bakewise.ai resolves to your cluster"
fi
echo ""
}
# Main execution
main() {
echo "${BLUE}"
echo "=========================================="
echo "🔍 SigNoz Verification for Bakery IA"
echo "=========================================="
echo "${NC}"
# Check prerequisites
check_kubectl
check_namespace
# Verify deployment
verify_deployment
# Run connectivity tests
run_connectivity_tests
echo "${GREEN}"
echo "=========================================="
echo "✅ Verification Complete"
echo "=========================================="
echo "${NC}"
echo "Summary:"
echo " Environment: $ENVIRONMENT"
echo " Namespace: $NAMESPACE"
echo ""
echo "Next Steps:"
echo " 1. Access SigNoz UI and verify dashboards"
echo " 2. Configure alert rules for your services"
echo " 3. Instrument your applications with OpenTelemetry"
echo " 4. Set up custom dashboards for key metrics"
echo ""
}
# Run main function
main