Imporve monitoring 2
This commit is contained in:
6
Tiltfile
6
Tiltfile
@@ -402,7 +402,7 @@ local_resource(
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "📈 SigNoz Access Information:"
|
echo "📈 SigNoz Access Information:"
|
||||||
echo " URL: https://monitoring.bakery-ia.local/signoz"
|
echo " URL: https://monitoring.bakery-ia.local"
|
||||||
echo " Username: admin"
|
echo " Username: admin"
|
||||||
echo " Password: admin"
|
echo " Password: admin"
|
||||||
echo ""
|
echo ""
|
||||||
@@ -445,7 +445,7 @@ local_resource(
|
|||||||
if [ "$READY_PODS" -eq "$TOTAL_PODS" ]; then
|
if [ "$READY_PODS" -eq "$TOTAL_PODS" ]; then
|
||||||
echo "✅ All SigNoz pods are running!"
|
echo "✅ All SigNoz pods are running!"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Access SigNoz at: https://monitoring.bakery-ia.local/signoz"
|
echo "Access SigNoz at: https://monitoring.bakery-ia.local"
|
||||||
echo "Credentials: admin / admin"
|
echo "Credentials: admin / admin"
|
||||||
else
|
else
|
||||||
echo "⏳ Waiting for pods to become ready..."
|
echo "⏳ Waiting for pods to become ready..."
|
||||||
@@ -687,7 +687,7 @@ Access your application:
|
|||||||
SigNoz (Unified Observability):
|
SigNoz (Unified Observability):
|
||||||
Deploy via Tilt: Trigger 'signoz-deployment' resource
|
Deploy via Tilt: Trigger 'signoz-deployment' resource
|
||||||
Manual deploy: ./infrastructure/helm/deploy-signoz.sh dev
|
Manual deploy: ./infrastructure/helm/deploy-signoz.sh dev
|
||||||
Access (if deployed): https://monitoring.bakery-ia.local/signoz
|
Access (if deployed): https://monitoring.bakery-ia.local
|
||||||
Username: admin
|
Username: admin
|
||||||
Password: admin
|
Password: admin
|
||||||
|
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ data:
|
|||||||
exporters:
|
exporters:
|
||||||
# Send to SigNoz
|
# Send to SigNoz
|
||||||
otlphttp:
|
otlphttp:
|
||||||
endpoint: http://signoz-otel-collector.signoz.svc.cluster.local:4318
|
endpoint: http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318
|
||||||
tls:
|
tls:
|
||||||
insecure: true
|
insecure: true
|
||||||
|
|
||||||
@@ -374,7 +374,7 @@ processors:
|
|||||||
|
|
||||||
exporters:
|
exporters:
|
||||||
otlphttp/logs:
|
otlphttp/logs:
|
||||||
endpoint: http://signoz-otel-collector.signoz.svc.cluster.local:4318/v1/logs
|
endpoint: http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318/v1/logs
|
||||||
|
|
||||||
service:
|
service:
|
||||||
pipelines:
|
pipelines:
|
||||||
|
|||||||
@@ -316,8 +316,8 @@ spec:
|
|||||||
#### Issue: No Metrics Appearing in SigNoz
|
#### Issue: No Metrics Appearing in SigNoz
|
||||||
|
|
||||||
**Checklist:**
|
**Checklist:**
|
||||||
- ✅ OpenTelemetry Collector running? `kubectl get pods -n signoz`
|
- ✅ OpenTelemetry Collector running? `kubectl get pods -n bakery-ia -l app.kubernetes.io/instance=signoz`
|
||||||
- ✅ Service can reach collector? `telnet signoz-otel-collector.signoz 4318`
|
- ✅ Service can reach collector? `telnet signoz-otel-collector.bakery-ia 4318`
|
||||||
- ✅ OTLP endpoint configured correctly? Check `OTEL_EXPORTER_OTLP_ENDPOINT`
|
- ✅ OTLP endpoint configured correctly? Check `OTEL_EXPORTER_OTLP_ENDPOINT`
|
||||||
- ✅ Service logs show OTLP export? Look for "Exporting metrics"
|
- ✅ Service logs show OTLP export? Look for "Exporting metrics"
|
||||||
- ✅ No network policies blocking? Check Kubernetes network policies
|
- ✅ No network policies blocking? Check Kubernetes network policies
|
||||||
@@ -325,13 +325,13 @@ spec:
|
|||||||
**Debugging:**
|
**Debugging:**
|
||||||
```bash
|
```bash
|
||||||
# Check OpenTelemetry Collector logs
|
# Check OpenTelemetry Collector logs
|
||||||
kubectl logs -n signoz -l app=otel-collector
|
kubectl logs -n bakery-ia -l app=otel-collector
|
||||||
|
|
||||||
# Check service logs for OTLP errors
|
# Check service logs for OTLP errors
|
||||||
kubectl logs -l app=auth-service | grep -i otel
|
kubectl logs -l app=auth-service | grep -i otel
|
||||||
|
|
||||||
# Test OTLP connectivity from service pod
|
# Test OTLP connectivity from service pod
|
||||||
kubectl exec -it auth-service-pod -- curl -v http://signoz-otel-collector.signoz:4318
|
kubectl exec -it auth-service-pod -- curl -v http://signoz-otel-collector.bakery-ia:4318
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Issue: High Latency in Specific Service
|
#### Issue: High Latency in Specific Service
|
||||||
@@ -442,7 +442,7 @@ class MyService(StandardFastAPIService):
|
|||||||
|
|
||||||
```env
|
```env
|
||||||
# OpenTelemetry Collector endpoint
|
# OpenTelemetry Collector endpoint
|
||||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://signoz-otel-collector.signoz:4318
|
OTEL_EXPORTER_OTLP_ENDPOINT=http://signoz-otel-collector.bakery-ia:4318
|
||||||
|
|
||||||
# Service-specific configuration
|
# Service-specific configuration
|
||||||
OTEL_SERVICE_NAME=auth-service
|
OTEL_SERVICE_NAME=auth-service
|
||||||
@@ -473,7 +473,7 @@ spec:
|
|||||||
image: auth-service:latest
|
image: auth-service:latest
|
||||||
env:
|
env:
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz:4318"
|
value: "http://signoz-otel-collector.bakery-ia:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "auth-service"
|
value: "auth-service"
|
||||||
- name: ENVIRONMENT
|
- name: ENVIRONMENT
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ def setup_tracing(service_name: str = "gateway"):
|
|||||||
|
|
||||||
# Configure OTLP exporter (sends to OpenTelemetry Collector)
|
# Configure OTLP exporter (sends to OpenTelemetry Collector)
|
||||||
otlp_exporter = OTLPSpanExporter(
|
otlp_exporter = OTLPSpanExporter(
|
||||||
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector.monitoring.svc.cluster.local:4317"),
|
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"),
|
||||||
insecure=True # Use insecure connection for internal cluster communication
|
insecure=True # Use insecure connection for internal cluster communication
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
554
infrastructure/helm/README.md
Normal file
554
infrastructure/helm/README.md
Normal file
@@ -0,0 +1,554 @@
|
|||||||
|
# SigNoz Helm Deployment for Bakery IA
|
||||||
|
|
||||||
|
This directory contains Helm configurations and deployment scripts for SigNoz observability platform.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
SigNoz is deployed using the official Helm chart with environment-specific configurations optimized for:
|
||||||
|
- **Development**: Colima + Kind (Kubernetes in Docker) with Tilt
|
||||||
|
- **Production**: VPS on clouding.io with MicroK8s
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### Required Tools
|
||||||
|
- **kubectl** 1.22+
|
||||||
|
- **Helm** 3.8+
|
||||||
|
- **Docker** (for development)
|
||||||
|
- **Kind/MicroK8s** (environment-specific)
|
||||||
|
|
||||||
|
### Docker Hub Authentication
|
||||||
|
|
||||||
|
SigNoz uses images from Docker Hub. Set up authentication to avoid rate limits:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Option 1: Environment variables (recommended)
|
||||||
|
export DOCKERHUB_USERNAME='your-username'
|
||||||
|
export DOCKERHUB_PASSWORD='your-personal-access-token'
|
||||||
|
|
||||||
|
# Option 2: Docker login
|
||||||
|
docker login
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### Development Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy SigNoz to development environment
|
||||||
|
./deploy-signoz.sh dev
|
||||||
|
|
||||||
|
# Verify deployment
|
||||||
|
./verify-signoz.sh dev
|
||||||
|
|
||||||
|
# Access SigNoz UI
|
||||||
|
# Via ingress: http://monitoring.bakery-ia.local
|
||||||
|
# Or port-forward:
|
||||||
|
kubectl port-forward -n signoz svc/signoz 8080:8080
|
||||||
|
# Then open: http://localhost:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy SigNoz to production environment
|
||||||
|
./deploy-signoz.sh prod
|
||||||
|
|
||||||
|
# Verify deployment
|
||||||
|
./verify-signoz.sh prod
|
||||||
|
|
||||||
|
# Access SigNoz UI
|
||||||
|
# https://monitoring.bakewise.ai
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Files
|
||||||
|
|
||||||
|
### signoz-values-dev.yaml
|
||||||
|
|
||||||
|
Development environment configuration with:
|
||||||
|
- Single replica for most components
|
||||||
|
- Reduced resource requests (optimized for local Kind cluster)
|
||||||
|
- 7-day data retention
|
||||||
|
- Batch size: 10,000 events
|
||||||
|
- ClickHouse 25.5.6, OTel Collector v0.129.12
|
||||||
|
- PostgreSQL, Redis, and RabbitMQ receivers configured
|
||||||
|
|
||||||
|
### signoz-values-prod.yaml
|
||||||
|
|
||||||
|
Production environment configuration with:
|
||||||
|
- High availability: 2+ replicas for critical components
|
||||||
|
- 3 Zookeeper replicas (required for production)
|
||||||
|
- 30-day data retention
|
||||||
|
- Batch size: 50,000 events (high-performance)
|
||||||
|
- Cold storage enabled with 30-day TTL
|
||||||
|
- Horizontal Pod Autoscaler (HPA) enabled
|
||||||
|
- TLS/SSL with cert-manager
|
||||||
|
- Enhanced security with pod anti-affinity rules
|
||||||
|
|
||||||
|
## Key Configuration Changes (v0.89.0+)
|
||||||
|
|
||||||
|
⚠️ **BREAKING CHANGE**: SigNoz Helm chart v0.89.0+ uses a unified component structure.
|
||||||
|
|
||||||
|
**Old Structure (deprecated):**
|
||||||
|
```yaml
|
||||||
|
frontend:
|
||||||
|
replicaCount: 2
|
||||||
|
queryService:
|
||||||
|
replicaCount: 2
|
||||||
|
```
|
||||||
|
|
||||||
|
**New Structure (current):**
|
||||||
|
```yaml
|
||||||
|
signoz:
|
||||||
|
replicaCount: 2
|
||||||
|
# Combines frontend + query service
|
||||||
|
```
|
||||||
|
|
||||||
|
## Component Architecture
|
||||||
|
|
||||||
|
### Core Components
|
||||||
|
|
||||||
|
1. **SigNoz** (unified component)
|
||||||
|
- Frontend UI + Query Service
|
||||||
|
- Port 8080 (HTTP/API), 8085 (internal gRPC)
|
||||||
|
- Dev: 1 replica, Prod: 2+ replicas with HPA
|
||||||
|
|
||||||
|
2. **ClickHouse** (Time-series database)
|
||||||
|
- Version: 25.5.6
|
||||||
|
- Stores traces, metrics, and logs
|
||||||
|
- Dev: 1 replica, Prod: 2 replicas with cold storage
|
||||||
|
|
||||||
|
3. **Zookeeper** (ClickHouse coordination)
|
||||||
|
- Version: 3.7.1
|
||||||
|
- Dev: 1 replica, Prod: 3 replicas (critical for HA)
|
||||||
|
|
||||||
|
4. **OpenTelemetry Collector** (Data ingestion)
|
||||||
|
- Version: v0.129.12
|
||||||
|
- Ports: 4317 (gRPC), 4318 (HTTP), 8888 (metrics)
|
||||||
|
- Dev: 1 replica, Prod: 2+ replicas with HPA
|
||||||
|
|
||||||
|
5. **Alertmanager** (Alert management)
|
||||||
|
- Version: 0.23.5
|
||||||
|
- Email and Slack integrations configured
|
||||||
|
- Port: 9093
|
||||||
|
|
||||||
|
## Performance Optimizations
|
||||||
|
|
||||||
|
### Batch Processing
|
||||||
|
- **Development**: 10,000 events per batch
|
||||||
|
- **Production**: 50,000 events per batch (official recommendation)
|
||||||
|
- Timeout: 1 second for faster processing
|
||||||
|
|
||||||
|
### Memory Management
|
||||||
|
- Memory limiter processor prevents OOM
|
||||||
|
- Dev: 400 MiB limit, Prod: 1500 MiB limit
|
||||||
|
- Spike limits configured
|
||||||
|
|
||||||
|
### Span Metrics Processor
|
||||||
|
Automatically generates RED metrics (Rate, Errors, Duration):
|
||||||
|
- Latency histogram buckets optimized for microservices
|
||||||
|
- Cache size: 10K (dev), 100K (prod)
|
||||||
|
|
||||||
|
### Cold Storage (Production Only)
|
||||||
|
- Enabled with 30-day TTL
|
||||||
|
- Automatically moves old data to cold storage
|
||||||
|
- Keeps 10GB free on primary storage
|
||||||
|
|
||||||
|
## OpenTelemetry Endpoints
|
||||||
|
|
||||||
|
### From Within Kubernetes Cluster
|
||||||
|
|
||||||
|
**Development:**
|
||||||
|
```
|
||||||
|
OTLP gRPC: signoz-otel-collector.bakery-ia.svc.cluster.local:4317
|
||||||
|
OTLP HTTP: signoz-otel-collector.bakery-ia.svc.cluster.local:4318
|
||||||
|
```
|
||||||
|
|
||||||
|
**Production:**
|
||||||
|
```
|
||||||
|
OTLP gRPC: signoz-otel-collector.bakery-ia.svc.cluster.local:4317
|
||||||
|
OTLP HTTP: signoz-otel-collector.bakery-ia.svc.cluster.local:4318
|
||||||
|
```
|
||||||
|
|
||||||
|
### Application Configuration Example
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Python with OpenTelemetry
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
|
OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf"
|
||||||
|
```
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Node.js with OpenTelemetry
|
||||||
|
const exporter = new OTLPTraceExporter({
|
||||||
|
url: 'http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318/v1/traces',
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Scripts
|
||||||
|
|
||||||
|
### deploy-signoz.sh
|
||||||
|
|
||||||
|
Comprehensive deployment script with features:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Usage
|
||||||
|
./deploy-signoz.sh [OPTIONS] ENVIRONMENT
|
||||||
|
|
||||||
|
# Options
|
||||||
|
-h, --help Show help message
|
||||||
|
-d, --dry-run Show what would be deployed
|
||||||
|
-u, --upgrade Upgrade existing deployment
|
||||||
|
-r, --remove Remove deployment
|
||||||
|
-n, --namespace NS Custom namespace (default: signoz)
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
./deploy-signoz.sh dev # Deploy to dev
|
||||||
|
./deploy-signoz.sh --upgrade prod # Upgrade prod
|
||||||
|
./deploy-signoz.sh --dry-run prod # Preview changes
|
||||||
|
./deploy-signoz.sh --remove dev # Remove dev deployment
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Automatic Helm repository setup
|
||||||
|
- Docker Hub secret creation
|
||||||
|
- Namespace management
|
||||||
|
- Deployment verification
|
||||||
|
- 15-minute timeout with `--wait` flag
|
||||||
|
|
||||||
|
### verify-signoz.sh
|
||||||
|
|
||||||
|
Verification script to check deployment health:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Usage
|
||||||
|
./verify-signoz.sh [OPTIONS] ENVIRONMENT
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
./verify-signoz.sh dev # Verify dev deployment
|
||||||
|
./verify-signoz.sh prod # Verify prod deployment
|
||||||
|
```
|
||||||
|
|
||||||
|
**Checks performed:**
|
||||||
|
1. ✅ Helm release status
|
||||||
|
2. ✅ Pod health and readiness
|
||||||
|
3. ✅ Service availability
|
||||||
|
4. ✅ Ingress configuration
|
||||||
|
5. ✅ PVC status
|
||||||
|
6. ✅ Resource usage (if metrics-server available)
|
||||||
|
7. ✅ Log errors
|
||||||
|
8. ✅ Environment-specific validations
|
||||||
|
- Dev: Single replica, resource limits
|
||||||
|
- Prod: HA config, TLS, Zookeeper replicas, HPA
|
||||||
|
|
||||||
|
## Storage Configuration
|
||||||
|
|
||||||
|
### Development (Kind)
|
||||||
|
```yaml
|
||||||
|
global:
|
||||||
|
storageClass: "standard" # Kind's default provisioner
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production (MicroK8s)
|
||||||
|
```yaml
|
||||||
|
global:
|
||||||
|
storageClass: "microk8s-hostpath" # Or custom storage class
|
||||||
|
```
|
||||||
|
|
||||||
|
**Storage Requirements:**
|
||||||
|
- **Development**: ~35 GiB total
|
||||||
|
- SigNoz: 5 GiB
|
||||||
|
- ClickHouse: 20 GiB
|
||||||
|
- Zookeeper: 5 GiB
|
||||||
|
- Alertmanager: 2 GiB
|
||||||
|
|
||||||
|
- **Production**: ~135 GiB total
|
||||||
|
- SigNoz: 20 GiB
|
||||||
|
- ClickHouse: 100 GiB
|
||||||
|
- Zookeeper: 10 GiB
|
||||||
|
- Alertmanager: 5 GiB
|
||||||
|
|
||||||
|
## Resource Requirements
|
||||||
|
|
||||||
|
### Development Environment
|
||||||
|
**Minimum:**
|
||||||
|
- CPU: 550m (0.55 cores)
|
||||||
|
- Memory: 1.6 GiB
|
||||||
|
- Storage: 35 GiB
|
||||||
|
|
||||||
|
**Recommended:**
|
||||||
|
- CPU: 3 cores
|
||||||
|
- Memory: 3 GiB
|
||||||
|
- Storage: 50 GiB
|
||||||
|
|
||||||
|
### Production Environment
|
||||||
|
**Minimum:**
|
||||||
|
- CPU: 3.5 cores
|
||||||
|
- Memory: 8 GiB
|
||||||
|
- Storage: 135 GiB
|
||||||
|
|
||||||
|
**Recommended:**
|
||||||
|
- CPU: 12 cores
|
||||||
|
- Memory: 20 GiB
|
||||||
|
- Storage: 200 GiB
|
||||||
|
|
||||||
|
## Data Retention
|
||||||
|
|
||||||
|
### Development
|
||||||
|
- Traces: 7 days (168 hours)
|
||||||
|
- Metrics: 7 days (168 hours)
|
||||||
|
- Logs: 7 days (168 hours)
|
||||||
|
|
||||||
|
### Production
|
||||||
|
- Traces: 30 days (720 hours)
|
||||||
|
- Metrics: 30 days (720 hours)
|
||||||
|
- Logs: 30 days (720 hours)
|
||||||
|
- Cold storage after 30 days
|
||||||
|
|
||||||
|
To modify retention, update the environment variables:
|
||||||
|
```yaml
|
||||||
|
signoz:
|
||||||
|
env:
|
||||||
|
signoz_traces_ttl_duration_hrs: "720" # 30 days
|
||||||
|
signoz_metrics_ttl_duration_hrs: "720" # 30 days
|
||||||
|
signoz_logs_ttl_duration_hrs: "168" # 7 days
|
||||||
|
```
|
||||||
|
|
||||||
|
## High Availability (Production)
|
||||||
|
|
||||||
|
### Replication Strategy
|
||||||
|
```yaml
|
||||||
|
signoz: 2 replicas + HPA (min: 2, max: 5)
|
||||||
|
clickhouse: 2 replicas
|
||||||
|
zookeeper: 3 replicas (critical!)
|
||||||
|
otelCollector: 2 replicas + HPA (min: 2, max: 10)
|
||||||
|
alertmanager: 2 replicas
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pod Anti-Affinity
|
||||||
|
Ensures pods are distributed across different nodes:
|
||||||
|
```yaml
|
||||||
|
affinity:
|
||||||
|
podAntiAffinity:
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 100
|
||||||
|
podAffinityTerm:
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: query-service
|
||||||
|
topologyKey: kubernetes.io/hostname
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pod Disruption Budgets
|
||||||
|
Configured for all critical components:
|
||||||
|
```yaml
|
||||||
|
podDisruptionBudget:
|
||||||
|
enabled: true
|
||||||
|
minAvailable: 1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring and Alerting
|
||||||
|
|
||||||
|
### Email Alerts (Production)
|
||||||
|
Configure SMTP in production values:
|
||||||
|
```yaml
|
||||||
|
signoz:
|
||||||
|
env:
|
||||||
|
signoz_smtp_enabled: "true"
|
||||||
|
signoz_smtp_host: "smtp.gmail.com"
|
||||||
|
signoz_smtp_port: "587"
|
||||||
|
signoz_smtp_from: "alerts@bakewise.ai"
|
||||||
|
signoz_smtp_username: "alerts@bakewise.ai"
|
||||||
|
# Set via secret: signoz_smtp_password
|
||||||
|
```
|
||||||
|
|
||||||
|
### Slack Alerts (Production)
|
||||||
|
Configure webhook in Alertmanager:
|
||||||
|
```yaml
|
||||||
|
alertmanager:
|
||||||
|
config:
|
||||||
|
receivers:
|
||||||
|
- name: 'critical-alerts'
|
||||||
|
slack_configs:
|
||||||
|
- api_url: '${SLACK_WEBHOOK_URL}'
|
||||||
|
channel: '#alerts-critical'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Self-Monitoring
|
||||||
|
SigNoz monitors itself:
|
||||||
|
```yaml
|
||||||
|
selfMonitoring:
|
||||||
|
enabled: true
|
||||||
|
serviceMonitor:
|
||||||
|
enabled: true # Prod only
|
||||||
|
interval: 30s
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
**1. Pods not starting**
|
||||||
|
```bash
|
||||||
|
# Check pod status
|
||||||
|
kubectl get pods -n signoz
|
||||||
|
|
||||||
|
# Check pod logs
|
||||||
|
kubectl logs -n signoz <pod-name>
|
||||||
|
|
||||||
|
# Describe pod for events
|
||||||
|
kubectl describe pod -n signoz <pod-name>
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Docker Hub rate limits**
|
||||||
|
```bash
|
||||||
|
# Verify secret exists
|
||||||
|
kubectl get secret dockerhub-creds -n signoz
|
||||||
|
|
||||||
|
# Recreate secret
|
||||||
|
kubectl delete secret dockerhub-creds -n signoz
|
||||||
|
export DOCKERHUB_USERNAME='your-username'
|
||||||
|
export DOCKERHUB_PASSWORD='your-token'
|
||||||
|
./deploy-signoz.sh dev
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. ClickHouse connection issues**
|
||||||
|
```bash
|
||||||
|
# Check ClickHouse pod
|
||||||
|
kubectl logs -n signoz -l app.kubernetes.io/component=clickhouse
|
||||||
|
|
||||||
|
# Check Zookeeper (required by ClickHouse)
|
||||||
|
kubectl logs -n signoz -l app.kubernetes.io/component=zookeeper
|
||||||
|
```
|
||||||
|
|
||||||
|
**4. OTel Collector not receiving data**
|
||||||
|
```bash
|
||||||
|
# Check OTel Collector logs
|
||||||
|
kubectl logs -n signoz -l app.kubernetes.io/component=otel-collector
|
||||||
|
|
||||||
|
# Test connectivity
|
||||||
|
kubectl port-forward -n signoz svc/signoz-otel-collector 4318:4318
|
||||||
|
curl -v http://localhost:4318/v1/traces
|
||||||
|
```
|
||||||
|
|
||||||
|
**5. Insufficient storage**
|
||||||
|
```bash
|
||||||
|
# Check PVC status
|
||||||
|
kubectl get pvc -n signoz
|
||||||
|
|
||||||
|
# Check storage usage (if metrics-server available)
|
||||||
|
kubectl top pods -n signoz
|
||||||
|
```
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
Enable debug exporter in OTel Collector:
|
||||||
|
```yaml
|
||||||
|
otelCollector:
|
||||||
|
config:
|
||||||
|
exporters:
|
||||||
|
debug:
|
||||||
|
verbosity: detailed
|
||||||
|
sampling_initial: 5
|
||||||
|
sampling_thereafter: 200
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
traces:
|
||||||
|
exporters: [clickhousetraces, debug] # Add debug
|
||||||
|
```
|
||||||
|
|
||||||
|
### Upgrade from Old Version
|
||||||
|
|
||||||
|
If upgrading from pre-v0.89.0:
|
||||||
|
```bash
|
||||||
|
# 1. Backup data (recommended)
|
||||||
|
kubectl get all -n signoz -o yaml > signoz-backup.yaml
|
||||||
|
|
||||||
|
# 2. Remove old deployment
|
||||||
|
./deploy-signoz.sh --remove prod
|
||||||
|
|
||||||
|
# 3. Deploy new version
|
||||||
|
./deploy-signoz.sh prod
|
||||||
|
|
||||||
|
# 4. Verify
|
||||||
|
./verify-signoz.sh prod
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Best Practices
|
||||||
|
|
||||||
|
1. **Change default password** immediately after first login
|
||||||
|
2. **Use TLS/SSL** in production (configured with cert-manager)
|
||||||
|
3. **Network policies** enabled in production
|
||||||
|
4. **Run as non-root** (configured in securityContext)
|
||||||
|
5. **RBAC** with dedicated service account
|
||||||
|
6. **Secrets management** for sensitive data (SMTP, Slack webhooks)
|
||||||
|
7. **Image pull secrets** to avoid exposing Docker Hub credentials
|
||||||
|
|
||||||
|
## Backup and Recovery
|
||||||
|
|
||||||
|
### Backup ClickHouse Data
|
||||||
|
```bash
|
||||||
|
# Export ClickHouse data
|
||||||
|
kubectl exec -n signoz <clickhouse-pod> -- clickhouse-client \
|
||||||
|
--query="BACKUP DATABASE signoz_traces TO Disk('backups', 'traces_backup.zip')"
|
||||||
|
|
||||||
|
# Copy backup out
|
||||||
|
kubectl cp signoz/<clickhouse-pod>:/var/lib/clickhouse/backups/ ./backups/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore from Backup
|
||||||
|
```bash
|
||||||
|
# Copy backup in
|
||||||
|
kubectl cp ./backups/ signoz/<clickhouse-pod>:/var/lib/clickhouse/backups/
|
||||||
|
|
||||||
|
# Restore
|
||||||
|
kubectl exec -n signoz <clickhouse-pod> -- clickhouse-client \
|
||||||
|
--query="RESTORE DATABASE signoz_traces FROM Disk('backups', 'traces_backup.zip')"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Updating Configuration
|
||||||
|
|
||||||
|
To update SigNoz configuration:
|
||||||
|
|
||||||
|
1. Edit values file: `signoz-values-{env}.yaml`
|
||||||
|
2. Apply changes:
|
||||||
|
```bash
|
||||||
|
./deploy-signoz.sh --upgrade {env}
|
||||||
|
```
|
||||||
|
3. Verify:
|
||||||
|
```bash
|
||||||
|
./verify-signoz.sh {env}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Uninstallation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Remove SigNoz deployment
|
||||||
|
./deploy-signoz.sh --remove {env}
|
||||||
|
|
||||||
|
# Optionally delete PVCs (WARNING: deletes all data)
|
||||||
|
kubectl delete pvc -n signoz -l app.kubernetes.io/instance=signoz
|
||||||
|
|
||||||
|
# Optionally delete namespace
|
||||||
|
kubectl delete namespace signoz
|
||||||
|
```
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [SigNoz Official Documentation](https://signoz.io/docs/)
|
||||||
|
- [SigNoz Helm Charts Repository](https://github.com/SigNoz/charts)
|
||||||
|
- [OpenTelemetry Documentation](https://opentelemetry.io/docs/)
|
||||||
|
- [ClickHouse Documentation](https://clickhouse.com/docs/)
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues or questions:
|
||||||
|
1. Check [SigNoz GitHub Issues](https://github.com/SigNoz/signoz/issues)
|
||||||
|
2. Review deployment logs: `kubectl logs -n signoz <pod-name>`
|
||||||
|
3. Run verification script: `./verify-signoz.sh {env}`
|
||||||
|
4. Check [SigNoz Community Slack](https://signoz.io/slack)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Last Updated**: 2026-01-09
|
||||||
|
**SigNoz Helm Chart Version**: Latest (v0.129.12 components)
|
||||||
|
**Maintained by**: Bakery IA Team
|
||||||
@@ -30,7 +30,7 @@ show_help() {
|
|||||||
-d, --dry-run Dry run - show what would be done without actually deploying
|
-d, --dry-run Dry run - show what would be done without actually deploying
|
||||||
-u, --upgrade Upgrade existing deployment
|
-u, --upgrade Upgrade existing deployment
|
||||||
-r, --remove Remove/Uninstall SigNoz deployment
|
-r, --remove Remove/Uninstall SigNoz deployment
|
||||||
-n, --namespace NAMESPACE Specify namespace (default: signoz)"
|
-n, --namespace NAMESPACE Specify namespace (default: bakery-ia)"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Examples:
|
echo "Examples:
|
||||||
$0 dev # Deploy to development
|
$0 dev # Deploy to development
|
||||||
@@ -51,7 +51,7 @@ show_help() {
|
|||||||
DRY_RUN=false
|
DRY_RUN=false
|
||||||
UPGRADE=false
|
UPGRADE=false
|
||||||
REMOVE=false
|
REMOVE=false
|
||||||
NAMESPACE="signoz"
|
NAMESPACE="bakery-ia"
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case $1 in
|
case $1 in
|
||||||
@@ -208,6 +208,29 @@ create_dockerhub_secret() {
|
|||||||
echo ""
|
echo ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Function to add and update Helm repository
|
||||||
|
setup_helm_repo() {
|
||||||
|
echo "${BLUE}Setting up SigNoz Helm repository...${NC}"
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
echo " (dry-run) Would add SigNoz Helm repository"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add SigNoz Helm repository
|
||||||
|
if helm repo list | grep -q "^signoz"; then
|
||||||
|
echo "${BLUE}SigNoz repository already added, updating...${NC}"
|
||||||
|
helm repo update signoz
|
||||||
|
else
|
||||||
|
echo "${BLUE}Adding SigNoz Helm repository...${NC}"
|
||||||
|
helm repo add signoz https://charts.signoz.io
|
||||||
|
helm repo update
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "${GREEN}Helm repository ready.${NC}"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
# Function to deploy SigNoz
|
# Function to deploy SigNoz
|
||||||
deploy_signoz() {
|
deploy_signoz() {
|
||||||
local values_file="infrastructure/helm/signoz-values-$ENVIRONMENT.yaml"
|
local values_file="infrastructure/helm/signoz-values-$ENVIRONMENT.yaml"
|
||||||
@@ -220,22 +243,30 @@ deploy_signoz() {
|
|||||||
echo "${BLUE}Deploying SigNoz to $ENVIRONMENT environment...${NC}"
|
echo "${BLUE}Deploying SigNoz to $ENVIRONMENT environment...${NC}"
|
||||||
echo " Using values file: $values_file"
|
echo " Using values file: $values_file"
|
||||||
echo " Target namespace: $NAMESPACE"
|
echo " Target namespace: $NAMESPACE"
|
||||||
|
echo " Chart version: Latest from signoz/signoz"
|
||||||
|
|
||||||
if [[ "$DRY_RUN" == true ]]; then
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
echo " (dry-run) Would deploy SigNoz with:"
|
echo " (dry-run) Would deploy SigNoz with:"
|
||||||
echo " helm install signoz signoz/signoz -n $NAMESPACE -f $values_file"
|
echo " helm upgrade --install signoz signoz/signoz -n $NAMESPACE -f $values_file --wait --timeout 15m"
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Use upgrade --install to handle both new installations and upgrades
|
# Use upgrade --install to handle both new installations and upgrades
|
||||||
echo "${BLUE}Installing/Upgrading SigNoz...${NC}"
|
echo "${BLUE}Installing/Upgrading SigNoz...${NC}"
|
||||||
helm upgrade --install signoz signoz/signoz -n "$NAMESPACE" -f "$values_file"
|
echo "This may take 10-15 minutes..."
|
||||||
|
|
||||||
echo "${GREEN}SigNoz deployment initiated.${NC}"
|
helm upgrade --install signoz signoz/signoz \
|
||||||
echo "Waiting for pods to become ready..."
|
-n "$NAMESPACE" \
|
||||||
|
-f "$values_file" \
|
||||||
|
--wait \
|
||||||
|
--timeout 15m \
|
||||||
|
--create-namespace
|
||||||
|
|
||||||
# Wait for deployment to complete
|
echo "${GREEN}SigNoz deployment completed.${NC}"
|
||||||
wait_for_deployment
|
echo ""
|
||||||
|
|
||||||
|
# Show deployment status
|
||||||
|
show_deployment_status
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to remove SigNoz
|
# Function to remove SigNoz
|
||||||
@@ -248,52 +279,19 @@ remove_signoz() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if helm list -n "$NAMESPACE" | grep -q signoz; then
|
if helm list -n "$NAMESPACE" | grep -q signoz; then
|
||||||
helm uninstall signoz -n "$NAMESPACE"
|
helm uninstall signoz -n "$NAMESPACE" --wait
|
||||||
echo "${GREEN}SigNoz deployment removed.${NC}"
|
echo "${GREEN}SigNoz deployment removed.${NC}"
|
||||||
|
|
||||||
|
# Optionally remove PVCs (commented out by default for safety)
|
||||||
|
echo ""
|
||||||
|
echo "${YELLOW}Note: Persistent Volume Claims (PVCs) were NOT deleted.${NC}"
|
||||||
|
echo "To delete PVCs and all data, run:"
|
||||||
|
echo " kubectl delete pvc -n $NAMESPACE -l app.kubernetes.io/instance=signoz"
|
||||||
else
|
else
|
||||||
echo "${YELLOW}No SigNoz deployment found in namespace $NAMESPACE.${NC}"
|
echo "${YELLOW}No SigNoz deployment found in namespace $NAMESPACE.${NC}"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to wait for deployment to complete
|
|
||||||
wait_for_deployment() {
|
|
||||||
echo "${BLUE}Waiting for SigNoz pods to become ready...${NC}"
|
|
||||||
|
|
||||||
# Wait for pods to be ready
|
|
||||||
local timeout=600 # 10 minutes
|
|
||||||
local start_time=$(date +%s)
|
|
||||||
|
|
||||||
while true; do
|
|
||||||
local current_time=$(date +%s)
|
|
||||||
local elapsed=$((current_time - start_time))
|
|
||||||
|
|
||||||
if [[ $elapsed -ge $timeout ]]; then
|
|
||||||
echo "${RED}Timeout waiting for SigNoz pods to become ready.${NC}"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check pod status
|
|
||||||
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep -c "Running" | tr -d '[:space:]' || echo "0")
|
|
||||||
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d '[:space:]' || echo "0")
|
|
||||||
|
|
||||||
if [[ $ready_pods -eq 0 ]]; then
|
|
||||||
echo " Waiting for pods to start..."
|
|
||||||
else
|
|
||||||
echo " $ready_pods/$total_pods pods are running"
|
|
||||||
|
|
||||||
if [[ $ready_pods -eq $total_pods && $total_pods -gt 0 ]]; then
|
|
||||||
echo "${GREEN}All SigNoz pods are running!${NC}"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
sleep 10
|
|
||||||
done
|
|
||||||
|
|
||||||
# Show deployment status
|
|
||||||
show_deployment_status
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to show deployment status
|
# Function to show deployment status
|
||||||
show_deployment_status() {
|
show_deployment_status() {
|
||||||
echo ""
|
echo ""
|
||||||
@@ -324,28 +322,34 @@ show_access_info() {
|
|||||||
echo "${BLUE}=== Access Information ===${NC}"
|
echo "${BLUE}=== Access Information ===${NC}"
|
||||||
|
|
||||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||||
echo "SigNoz UI: https://localhost/signoz"
|
echo "SigNoz UI: http://monitoring.bakery-ia.local"
|
||||||
echo "SigNoz API: https://localhost/signoz-api"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "OpenTelemetry Collector Endpoints:"
|
echo "OpenTelemetry Collector Endpoints (from within cluster):"
|
||||||
echo " gRPC: localhost:4317"
|
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
|
||||||
echo " HTTP: localhost:4318"
|
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
|
||||||
echo " Metrics: localhost:8888"
|
echo ""
|
||||||
|
echo "Port-forward for local access:"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4317:4317"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4318:4318"
|
||||||
else
|
else
|
||||||
echo "SigNoz UI: https://monitoring.bakewise.ai/signoz"
|
echo "SigNoz UI: https://monitoring.bakewise.ai"
|
||||||
echo "SigNoz API: https://monitoring.bakewise.ai/signoz-api"
|
|
||||||
echo "SigNoz Alerts: https://monitoring.bakewise.ai/signoz-alerts"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "OpenTelemetry Collector Endpoints:"
|
echo "OpenTelemetry Collector Endpoints (from within cluster):"
|
||||||
echo " gRPC: monitoring.bakewise.ai:4317"
|
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
|
||||||
echo " HTTP: monitoring.bakewise.ai:4318"
|
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
|
||||||
|
echo ""
|
||||||
|
echo "External endpoints (if exposed):"
|
||||||
|
echo " Check ingress configuration for external OTLP endpoints"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Default credentials:"
|
echo "Default credentials:"
|
||||||
echo " Username: admin"
|
echo " Username: admin@example.com"
|
||||||
echo " Password: admin"
|
echo " Password: admin"
|
||||||
echo ""
|
echo ""
|
||||||
|
echo "Note: Change default password after first login!"
|
||||||
|
echo ""
|
||||||
}
|
}
|
||||||
|
|
||||||
# Main execution
|
# Main execution
|
||||||
@@ -368,6 +372,9 @@ main() {
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Setup Helm repository
|
||||||
|
setup_helm_repo
|
||||||
|
|
||||||
# Create Docker Hub secret for image pulls
|
# Create Docker Hub secret for image pulls
|
||||||
create_dockerhub_secret
|
create_dockerhub_secret
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
# SigNoz Helm Chart Values - Development Environment
|
# SigNoz Helm Chart Values - Development Environment
|
||||||
# Optimized for local development with minimal resource usage
|
# Optimized for local development with minimal resource usage
|
||||||
|
# DEPLOYED IN bakery-ia NAMESPACE - Ingress managed by bakery-ingress
|
||||||
#
|
#
|
||||||
# Official Chart: https://github.com/SigNoz/charts
|
# Official Chart: https://github.com/SigNoz/charts
|
||||||
# Install Command: helm install signoz signoz/signoz -n signoz --create-namespace -f signoz-values-dev.yaml
|
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-dev.yaml
|
||||||
|
|
||||||
global:
|
global:
|
||||||
storageClass: "standard"
|
storageClass: "standard"
|
||||||
|
clusterName: "bakery-ia-dev"
|
||||||
domain: "monitoring.bakery-ia.local"
|
domain: "monitoring.bakery-ia.local"
|
||||||
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
@@ -23,17 +25,10 @@ signoz:
|
|||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
port: 8080
|
port: 8080
|
||||||
|
|
||||||
|
# DISABLE built-in ingress - using unified bakery-ingress instead
|
||||||
|
# Route configured in infrastructure/kubernetes/overlays/dev/dev-ingress.yaml
|
||||||
ingress:
|
ingress:
|
||||||
enabled: true
|
enabled: false
|
||||||
className: nginx
|
|
||||||
annotations: {}
|
|
||||||
hosts:
|
|
||||||
- host: monitoring.bakery-ia.local
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: Prefix
|
|
||||||
port: 8080
|
|
||||||
tls: []
|
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
@@ -43,6 +38,17 @@ signoz:
|
|||||||
cpu: 1000m
|
cpu: 1000m
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
|
|
||||||
|
# Environment variables (new format - replaces configVars)
|
||||||
|
env:
|
||||||
|
signoz_telemetrystore_provider: "clickhouse"
|
||||||
|
dot_metrics_enabled: "true"
|
||||||
|
signoz_emailing_enabled: "false"
|
||||||
|
signoz_alertmanager_provider: "signoz"
|
||||||
|
# Retention for dev (7 days)
|
||||||
|
signoz_traces_ttl_duration_hrs: "168"
|
||||||
|
signoz_metrics_ttl_duration_hrs: "168"
|
||||||
|
signoz_logs_ttl_duration_hrs: "168"
|
||||||
|
|
||||||
persistence:
|
persistence:
|
||||||
enabled: true
|
enabled: true
|
||||||
size: 5Gi
|
size: 5Gi
|
||||||
@@ -92,6 +98,11 @@ clickhouse:
|
|||||||
enabled: true
|
enabled: true
|
||||||
installCustomStorageClass: false
|
installCustomStorageClass: false
|
||||||
|
|
||||||
|
image:
|
||||||
|
registry: docker.io
|
||||||
|
repository: clickhouse/clickhouse-server
|
||||||
|
tag: 25.5.6 # Official recommended version
|
||||||
|
|
||||||
# Reduce ClickHouse resource requests for local dev
|
# Reduce ClickHouse resource requests for local dev
|
||||||
clickhouse:
|
clickhouse:
|
||||||
resources:
|
resources:
|
||||||
@@ -102,15 +113,39 @@ clickhouse:
|
|||||||
cpu: 1000m
|
cpu: 1000m
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
|
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
size: 20Gi
|
||||||
|
|
||||||
# Zookeeper Configuration (required by ClickHouse)
|
# Zookeeper Configuration (required by ClickHouse)
|
||||||
zookeeper:
|
zookeeper:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
replicaCount: 1 # Single replica for dev
|
||||||
|
|
||||||
|
image:
|
||||||
|
tag: 3.7.1 # Official recommended version
|
||||||
|
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
size: 5Gi
|
||||||
|
|
||||||
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
|
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
|
||||||
otelCollector:
|
otelCollector:
|
||||||
enabled: true
|
enabled: true
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
|
|
||||||
|
image:
|
||||||
|
repository: signoz/signoz-otel-collector
|
||||||
|
tag: v0.129.12 # Latest recommended version
|
||||||
|
|
||||||
# Service configuration - expose both gRPC and HTTP endpoints
|
# Service configuration - expose both gRPC and HTTP endpoints
|
||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
@@ -130,6 +165,11 @@ otelCollector:
|
|||||||
port: 8889
|
port: 8889
|
||||||
targetPort: 8889
|
targetPort: 8889
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
|
# Metrics
|
||||||
|
- name: metrics
|
||||||
|
port: 8888
|
||||||
|
targetPort: 8888
|
||||||
|
protocol: TCP
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
@@ -210,10 +250,11 @@ otelCollector:
|
|||||||
collection_interval: 60s
|
collection_interval: 60s
|
||||||
|
|
||||||
processors:
|
processors:
|
||||||
# Batch processor for better performance
|
# Batch processor for better performance (optimized for high throughput)
|
||||||
batch:
|
batch:
|
||||||
timeout: 10s
|
timeout: 1s
|
||||||
send_batch_size: 1024
|
send_batch_size: 10000 # Increased from 1024 for better performance
|
||||||
|
send_batch_max_size: 10000
|
||||||
|
|
||||||
# Memory limiter to prevent OOM
|
# Memory limiter to prevent OOM
|
||||||
memory_limiter:
|
memory_limiter:
|
||||||
@@ -223,35 +264,57 @@ otelCollector:
|
|||||||
|
|
||||||
# Resource detection
|
# Resource detection
|
||||||
resourcedetection:
|
resourcedetection:
|
||||||
detectors: [env, system]
|
detectors: [env, system, docker]
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
|
|
||||||
|
# Span metrics processor for automatic service metrics
|
||||||
|
spanmetrics:
|
||||||
|
metrics_exporter: signozclickhousemetrics
|
||||||
|
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
|
||||||
|
dimensions_cache_size: 10000
|
||||||
|
|
||||||
exporters:
|
exporters:
|
||||||
# ClickHouse exporter for traces
|
# ClickHouse exporter for traces
|
||||||
clickhousetraces:
|
clickhousetraces:
|
||||||
datasource: tcp://signoz-clickhouse:9000/?database=signoz_traces
|
datasource: tcp://signoz-clickhouse:9000/?database=signoz_traces
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
|
retry_on_failure:
|
||||||
|
enabled: true
|
||||||
|
initial_interval: 5s
|
||||||
|
max_interval: 30s
|
||||||
|
max_elapsed_time: 300s
|
||||||
|
|
||||||
# ClickHouse exporter for metrics
|
# ClickHouse exporter for metrics
|
||||||
signozclickhousemetrics:
|
signozclickhousemetrics:
|
||||||
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics"
|
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics"
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
|
retry_on_failure:
|
||||||
|
enabled: true
|
||||||
|
initial_interval: 5s
|
||||||
|
max_interval: 30s
|
||||||
|
max_elapsed_time: 300s
|
||||||
|
|
||||||
# ClickHouse exporter for logs
|
# ClickHouse exporter for logs
|
||||||
clickhouselogsexporter:
|
clickhouselogsexporter:
|
||||||
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
|
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
|
retry_on_failure:
|
||||||
|
enabled: true
|
||||||
|
initial_interval: 5s
|
||||||
|
max_interval: 30s
|
||||||
|
|
||||||
# Debug exporter for debugging (optional)
|
# Debug exporter for debugging (optional)
|
||||||
debug:
|
debug:
|
||||||
verbosity: detailed
|
verbosity: detailed
|
||||||
|
sampling_initial: 5
|
||||||
|
sampling_thereafter: 200
|
||||||
|
|
||||||
service:
|
service:
|
||||||
pipelines:
|
pipelines:
|
||||||
# Traces pipeline
|
# Traces pipeline
|
||||||
traces:
|
traces:
|
||||||
receivers: [otlp]
|
receivers: [otlp]
|
||||||
processors: [memory_limiter, batch, resourcedetection]
|
processors: [memory_limiter, batch, spanmetrics, resourcedetection]
|
||||||
exporters: [clickhousetraces]
|
exporters: [clickhousetraces]
|
||||||
|
|
||||||
# Metrics pipeline
|
# Metrics pipeline
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
# SigNoz Helm Chart Values - Production Environment
|
# SigNoz Helm Chart Values - Production Environment
|
||||||
# High-availability configuration with resource optimization
|
# High-availability configuration with resource optimization
|
||||||
|
# DEPLOYED IN bakery-ia NAMESPACE - Ingress managed by bakery-ingress-prod
|
||||||
#
|
#
|
||||||
# Official Chart: https://github.com/SigNoz/charts
|
# Official Chart: https://github.com/SigNoz/charts
|
||||||
# Install Command: helm install signoz signoz/signoz -n signoz --create-namespace -f signoz-values-prod.yaml
|
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-prod.yaml
|
||||||
|
|
||||||
global:
|
global:
|
||||||
storageClass: "standard"
|
storageClass: "standard" # For MicroK8s, use "microk8s-hostpath" or custom storage class
|
||||||
|
clusterName: "bakery-ia-prod"
|
||||||
domain: "monitoring.bakewise.ai"
|
domain: "monitoring.bakewise.ai"
|
||||||
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
@@ -15,43 +17,33 @@ global:
|
|||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
- dockerhub-creds
|
- dockerhub-creds
|
||||||
|
|
||||||
# Frontend Configuration
|
# SigNoz Main Component (unified frontend + query service)
|
||||||
frontend:
|
# BREAKING CHANGE: v0.89.0+ uses unified component instead of separate frontend/queryService
|
||||||
|
signoz:
|
||||||
replicaCount: 2
|
replicaCount: 2
|
||||||
|
|
||||||
image:
|
image:
|
||||||
repository: signoz/frontend
|
repository: signoz/signoz
|
||||||
tag: 0.52.3
|
tag: v0.106.0 # Latest stable version
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
port: 3301
|
port: 8080 # HTTP/API port
|
||||||
|
internalPort: 8085 # Internal gRPC port
|
||||||
|
|
||||||
|
# DISABLE built-in ingress - using unified bakery-ingress-prod instead
|
||||||
|
# Route configured in infrastructure/kubernetes/overlays/prod/prod-ingress.yaml
|
||||||
ingress:
|
ingress:
|
||||||
enabled: true
|
enabled: false
|
||||||
className: nginx
|
|
||||||
annotations:
|
|
||||||
nginx.ingress.kubernetes.io/rewrite-target: /$2
|
|
||||||
nginx.ingress.kubernetes.io/use-regex: "true"
|
|
||||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
||||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
||||||
hosts:
|
|
||||||
- host: monitoring.bakewise.ai
|
|
||||||
paths:
|
|
||||||
- path: /signoz(/|$)(.*)
|
|
||||||
pathType: ImplementationSpecific
|
|
||||||
tls:
|
|
||||||
- secretName: signoz-tls
|
|
||||||
hosts:
|
|
||||||
- monitoring.bakewise.ai
|
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 250m
|
|
||||||
memory: 512Mi
|
|
||||||
limits:
|
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
|
limits:
|
||||||
|
cpu: 2000m
|
||||||
|
memory: 4Gi
|
||||||
|
|
||||||
# Pod Anti-affinity for HA
|
# Pod Anti-affinity for HA
|
||||||
affinity:
|
affinity:
|
||||||
@@ -60,58 +52,27 @@ frontend:
|
|||||||
- weight: 100
|
- weight: 100
|
||||||
podAffinityTerm:
|
podAffinityTerm:
|
||||||
labelSelector:
|
labelSelector:
|
||||||
matchExpressions:
|
matchLabels:
|
||||||
- key: app
|
app.kubernetes.io/component: query-service
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- signoz-frontend
|
|
||||||
topologyKey: kubernetes.io/hostname
|
topologyKey: kubernetes.io/hostname
|
||||||
|
|
||||||
|
# Environment variables (new format - replaces configVars)
|
||||||
env:
|
env:
|
||||||
- name: FRONTEND_REFRESH_INTERVAL
|
signoz_telemetrystore_provider: "clickhouse"
|
||||||
value: "30000"
|
dot_metrics_enabled: "true"
|
||||||
|
signoz_emailing_enabled: "true"
|
||||||
# Query Service Configuration
|
signoz_alertmanager_provider: "signoz"
|
||||||
queryService:
|
# Retention configuration (30 days for prod)
|
||||||
replicaCount: 2
|
signoz_traces_ttl_duration_hrs: "720"
|
||||||
image:
|
signoz_metrics_ttl_duration_hrs: "720"
|
||||||
repository: signoz/query-service
|
signoz_logs_ttl_duration_hrs: "720"
|
||||||
tag: 0.52.3
|
# SMTP configuration for email alerts
|
||||||
pullPolicy: IfNotPresent
|
signoz_smtp_enabled: "true"
|
||||||
|
signoz_smtp_host: "smtp.gmail.com"
|
||||||
service:
|
signoz_smtp_port: "587"
|
||||||
type: ClusterIP
|
signoz_smtp_from: "alerts@bakewise.ai"
|
||||||
port: 8080
|
signoz_smtp_username: "alerts@bakewise.ai"
|
||||||
|
# Password should be set via secret: signoz_smtp_password
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 500m
|
|
||||||
memory: 1Gi
|
|
||||||
limits:
|
|
||||||
cpu: 1000m
|
|
||||||
memory: 2Gi
|
|
||||||
|
|
||||||
# Pod Anti-affinity for HA
|
|
||||||
affinity:
|
|
||||||
podAntiAffinity:
|
|
||||||
preferredDuringSchedulingIgnoredDuringExecution:
|
|
||||||
- weight: 100
|
|
||||||
podAffinityTerm:
|
|
||||||
labelSelector:
|
|
||||||
matchExpressions:
|
|
||||||
- key: app
|
|
||||||
operator: In
|
|
||||||
values:
|
|
||||||
- signoz-query-service
|
|
||||||
topologyKey: kubernetes.io/hostname
|
|
||||||
|
|
||||||
env:
|
|
||||||
- name: DEPLOYMENT_TYPE
|
|
||||||
value: "kubernetes-helm"
|
|
||||||
- name: SIGNOZ_LOCAL_DB_PATH
|
|
||||||
value: "/var/lib/signoz"
|
|
||||||
- name: RETENTION_DAYS
|
|
||||||
value: "30"
|
|
||||||
|
|
||||||
persistence:
|
persistence:
|
||||||
enabled: true
|
enabled: true
|
||||||
@@ -128,7 +89,9 @@ queryService:
|
|||||||
|
|
||||||
# AlertManager Configuration
|
# AlertManager Configuration
|
||||||
alertmanager:
|
alertmanager:
|
||||||
|
enabled: true
|
||||||
replicaCount: 2
|
replicaCount: 2
|
||||||
|
|
||||||
image:
|
image:
|
||||||
repository: signoz/alertmanager
|
repository: signoz/alertmanager
|
||||||
tag: 0.23.5
|
tag: 0.23.5
|
||||||
@@ -140,11 +103,11 @@ alertmanager:
|
|||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 250m
|
cpu: 100m
|
||||||
memory: 512Mi
|
memory: 128Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
memory: 1Gi
|
memory: 512Mi
|
||||||
|
|
||||||
# Pod Anti-affinity for HA
|
# Pod Anti-affinity for HA
|
||||||
affinity:
|
affinity:
|
||||||
@@ -210,24 +173,24 @@ alertmanager:
|
|||||||
|
|
||||||
# ClickHouse Configuration - Time Series Database
|
# ClickHouse Configuration - Time Series Database
|
||||||
clickhouse:
|
clickhouse:
|
||||||
replicaCount: 2
|
enabled: true
|
||||||
|
installCustomStorageClass: false
|
||||||
|
|
||||||
image:
|
image:
|
||||||
|
registry: docker.io
|
||||||
repository: clickhouse/clickhouse-server
|
repository: clickhouse/clickhouse-server
|
||||||
tag: 24.1.2-alpine
|
tag: 25.5.6 # Updated to official recommended version
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
service:
|
# ClickHouse resources (nested config)
|
||||||
type: ClusterIP
|
clickhouse:
|
||||||
httpPort: 8123
|
|
||||||
tcpPort: 9000
|
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 1000m
|
cpu: 1000m
|
||||||
memory: 2Gi
|
memory: 2Gi
|
||||||
limits:
|
limits:
|
||||||
cpu: 2000m
|
cpu: 4000m
|
||||||
memory: 4Gi
|
memory: 8Gi
|
||||||
|
|
||||||
# Pod Anti-affinity for HA
|
# Pod Anti-affinity for HA
|
||||||
affinity:
|
affinity:
|
||||||
@@ -246,50 +209,63 @@ clickhouse:
|
|||||||
size: 100Gi
|
size: 100Gi
|
||||||
storageClass: "standard"
|
storageClass: "standard"
|
||||||
|
|
||||||
# ClickHouse configuration
|
# Cold storage configuration for better disk space management
|
||||||
config:
|
coldStorage:
|
||||||
logger:
|
|
||||||
level: information
|
|
||||||
max_connections: 4096
|
|
||||||
max_concurrent_queries: 500
|
|
||||||
# Data retention (30 days for prod)
|
|
||||||
merge_tree:
|
|
||||||
parts_to_delay_insert: 150
|
|
||||||
parts_to_throw_insert: 300
|
|
||||||
# Performance tuning
|
|
||||||
max_memory_usage: 10000000000
|
|
||||||
max_bytes_before_external_group_by: 20000000000
|
|
||||||
|
|
||||||
# Backup configuration
|
|
||||||
backup:
|
|
||||||
enabled: true
|
enabled: true
|
||||||
schedule: "0 2 * * *"
|
defaultKeepFreeSpaceBytes: 10737418240 # Keep 10GB free
|
||||||
retention: 7
|
ttl:
|
||||||
|
deleteTTLDays: 30 # Move old data to cold storage after 30 days
|
||||||
|
|
||||||
|
# Zookeeper Configuration (required by ClickHouse for coordination)
|
||||||
|
zookeeper:
|
||||||
|
enabled: true
|
||||||
|
replicaCount: 3 # CRITICAL: Always use 3 replicas for production HA
|
||||||
|
|
||||||
|
image:
|
||||||
|
tag: 3.7.1 # Official recommended version
|
||||||
|
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
size: 10Gi
|
||||||
|
storageClass: "standard"
|
||||||
|
|
||||||
# OpenTelemetry Collector - Integrated with SigNoz
|
# OpenTelemetry Collector - Integrated with SigNoz
|
||||||
otelCollector:
|
otelCollector:
|
||||||
enabled: true
|
enabled: true
|
||||||
replicaCount: 2
|
replicaCount: 2
|
||||||
|
|
||||||
image:
|
image:
|
||||||
repository: signoz/signoz-otel-collector
|
repository: signoz/signoz-otel-collector
|
||||||
tag: 0.102.8
|
tag: v0.129.12 # Updated to latest recommended version
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
ports:
|
ports:
|
||||||
otlpGrpc: 4317
|
- name: otlp-grpc
|
||||||
otlpHttp: 4318
|
port: 4317
|
||||||
metrics: 8888
|
- name: otlp-http
|
||||||
healthCheck: 13133
|
port: 4318
|
||||||
|
- name: metrics
|
||||||
|
port: 8888
|
||||||
|
- name: healthcheck
|
||||||
|
port: 13133
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
memory: 512Mi
|
memory: 512Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 1000m
|
cpu: 2000m
|
||||||
memory: 1Gi
|
memory: 2Gi
|
||||||
|
|
||||||
# Full OTEL Collector Configuration
|
# Full OTEL Collector Configuration
|
||||||
config:
|
config:
|
||||||
@@ -304,7 +280,7 @@ otelCollector:
|
|||||||
protocols:
|
protocols:
|
||||||
grpc:
|
grpc:
|
||||||
endpoint: 0.0.0.0:4317
|
endpoint: 0.0.0.0:4317
|
||||||
max_recv_msg_size_mib: 16
|
max_recv_msg_size_mib: 32 # Increased for larger payloads
|
||||||
http:
|
http:
|
||||||
endpoint: 0.0.0.0:4318
|
endpoint: 0.0.0.0:4318
|
||||||
cors:
|
cors:
|
||||||
@@ -322,19 +298,20 @@ otelCollector:
|
|||||||
- targets: ['localhost:8888']
|
- targets: ['localhost:8888']
|
||||||
|
|
||||||
processors:
|
processors:
|
||||||
|
# High-performance batch processing (official recommendation)
|
||||||
batch:
|
batch:
|
||||||
timeout: 10s
|
timeout: 1s # Reduced from 10s for faster processing
|
||||||
send_batch_size: 2048
|
send_batch_size: 50000 # Increased from 2048 (official recommendation for traces)
|
||||||
send_batch_max_size: 4096
|
send_batch_max_size: 50000
|
||||||
|
|
||||||
memory_limiter:
|
memory_limiter:
|
||||||
check_interval: 1s
|
check_interval: 1s
|
||||||
limit_mib: 800
|
limit_mib: 1500 # 75% of container memory (2Gi = ~2048Mi)
|
||||||
spike_limit_mib: 200
|
spike_limit_mib: 300
|
||||||
|
|
||||||
# Resource detection for K8s
|
# Resource detection for K8s
|
||||||
resourcedetection:
|
resourcedetection:
|
||||||
detectors: [env, system, docker]
|
detectors: [env, system, docker, kubernetes]
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
|
|
||||||
# Add resource attributes
|
# Add resource attributes
|
||||||
@@ -347,6 +324,12 @@ otelCollector:
|
|||||||
value: bakery-ia-prod
|
value: bakery-ia-prod
|
||||||
action: upsert
|
action: upsert
|
||||||
|
|
||||||
|
# Span metrics processor for automatic service performance metrics
|
||||||
|
spanmetrics:
|
||||||
|
metrics_exporter: signozclickhousemetrics
|
||||||
|
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
|
||||||
|
dimensions_cache_size: 100000
|
||||||
|
|
||||||
exporters:
|
exporters:
|
||||||
# Export to SigNoz ClickHouse
|
# Export to SigNoz ClickHouse
|
||||||
clickhousetraces:
|
clickhousetraces:
|
||||||
@@ -387,8 +370,8 @@ otelCollector:
|
|||||||
pipelines:
|
pipelines:
|
||||||
traces:
|
traces:
|
||||||
receivers: [otlp]
|
receivers: [otlp]
|
||||||
processors: [memory_limiter, batch, resourcedetection, resource]
|
processors: [memory_limiter, batch, spanmetrics, resourcedetection, resource]
|
||||||
exporters: [clickhousetraces, debug]
|
exporters: [clickhousetraces]
|
||||||
|
|
||||||
metrics:
|
metrics:
|
||||||
receivers: [otlp, prometheus]
|
receivers: [otlp, prometheus]
|
||||||
@@ -398,12 +381,7 @@ otelCollector:
|
|||||||
logs:
|
logs:
|
||||||
receivers: [otlp]
|
receivers: [otlp]
|
||||||
processors: [memory_limiter, batch, resourcedetection, resource]
|
processors: [memory_limiter, batch, resourcedetection, resource]
|
||||||
exporters: [clickhouselogsexporter, debug]
|
exporters: [clickhouselogsexporter]
|
||||||
|
|
||||||
# OpenTelemetry Collector Deployment Mode
|
|
||||||
otelCollectorDeployment:
|
|
||||||
enabled: true
|
|
||||||
mode: deployment
|
|
||||||
|
|
||||||
# HPA for OTEL Collector
|
# HPA for OTEL Collector
|
||||||
autoscaling:
|
autoscaling:
|
||||||
@@ -413,29 +391,18 @@ otelCollectorDeployment:
|
|||||||
targetCPUUtilizationPercentage: 70
|
targetCPUUtilizationPercentage: 70
|
||||||
targetMemoryUtilizationPercentage: 80
|
targetMemoryUtilizationPercentage: 80
|
||||||
|
|
||||||
# Node Exporter for infrastructure metrics
|
# Schema Migrator - Manages ClickHouse schema migrations
|
||||||
nodeExporter:
|
schemaMigrator:
|
||||||
enabled: true
|
enabled: true
|
||||||
service:
|
|
||||||
type: ClusterIP
|
|
||||||
port: 9100
|
|
||||||
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 128Mi
|
|
||||||
limits:
|
|
||||||
cpu: 200m
|
|
||||||
memory: 256Mi
|
|
||||||
|
|
||||||
# Schemamanager - Manages ClickHouse schema
|
|
||||||
schemamanager:
|
|
||||||
enabled: true
|
|
||||||
image:
|
image:
|
||||||
repository: signoz/signoz-schema-migrator
|
repository: signoz/signoz-schema-migrator
|
||||||
tag: 0.52.3
|
tag: v0.129.12 # Updated to latest version
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
# Enable Helm hooks for proper upgrade handling
|
||||||
|
upgradeHelmHooks: true
|
||||||
|
|
||||||
# Additional Configuration
|
# Additional Configuration
|
||||||
serviceAccount:
|
serviceAccount:
|
||||||
create: true
|
create: true
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ show_help() {
|
|||||||
echo ""
|
echo ""
|
||||||
echo "Options:
|
echo "Options:
|
||||||
-h, --help Show this help message
|
-h, --help Show this help message
|
||||||
-n, --namespace NAMESPACE Specify namespace (default: signoz)"
|
-n, --namespace NAMESPACE Specify namespace (default: bakery-ia)"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Examples:
|
echo "Examples:
|
||||||
$0 dev # Verify development deployment
|
$0 dev # Verify development deployment
|
||||||
@@ -35,7 +35,7 @@ show_help() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Parse command line arguments
|
# Parse command line arguments
|
||||||
NAMESPACE="signoz"
|
NAMESPACE="bakery-ia"
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case $1 in
|
case $1 in
|
||||||
@@ -225,19 +225,27 @@ verify_deployment() {
|
|||||||
verify_dev_specific() {
|
verify_dev_specific() {
|
||||||
echo "${BLUE}8. Development-specific checks...${NC}"
|
echo "${BLUE}8. Development-specific checks...${NC}"
|
||||||
|
|
||||||
# Check if localhost ingress is configured
|
# Check if ingress is configured
|
||||||
if kubectl get ingress -n "$NAMESPACE" | grep -q "localhost"; then
|
if kubectl get ingress -n "$NAMESPACE" 2>/dev/null | grep -q "monitoring.bakery-ia.local"; then
|
||||||
echo "${GREEN}✅ Localhost ingress configured${NC}"
|
echo "${GREEN}✅ Development ingress configured${NC}"
|
||||||
else
|
else
|
||||||
echo "${YELLOW}⚠️ Localhost ingress not found${NC}"
|
echo "${YELLOW}⚠️ Development ingress not found${NC}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check resource limits (should be lower for dev)
|
# Check unified signoz component resource limits (should be lower for dev)
|
||||||
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
local signoz_mem=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||||
if [[ -n "$query_service" && "$query_service" == "512Mi" ]]; then
|
if [[ -n "$signoz_mem" ]]; then
|
||||||
echo "${GREEN}✅ Development resource limits applied${NC}"
|
echo "${GREEN}✅ SigNoz component found (memory limit: $signoz_mem)${NC}"
|
||||||
else
|
else
|
||||||
echo "${YELLOW}⚠️ Resource limits may not be optimized for development${NC}"
|
echo "${YELLOW}⚠️ Could not verify SigNoz component resources${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check single replica setup for dev
|
||||||
|
local replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "0")
|
||||||
|
if [[ $replicas -eq 1 ]]; then
|
||||||
|
echo "${GREEN}✅ Single replica configuration (appropriate for dev)${NC}"
|
||||||
|
else
|
||||||
|
echo "${YELLOW}⚠️ Multiple replicas detected (replicas: $replicas)${NC}"
|
||||||
fi
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
}
|
}
|
||||||
@@ -247,26 +255,52 @@ verify_prod_specific() {
|
|||||||
echo "${BLUE}8. Production-specific checks...${NC}"
|
echo "${BLUE}8. Production-specific checks...${NC}"
|
||||||
|
|
||||||
# Check if TLS is configured
|
# Check if TLS is configured
|
||||||
if kubectl get ingress -n "$NAMESPACE" | grep -q "signoz-tls-cert"; then
|
if kubectl get ingress -n "$NAMESPACE" 2>/dev/null | grep -q "signoz-tls"; then
|
||||||
echo "${GREEN}✅ TLS certificate configured${NC}"
|
echo "${GREEN}✅ TLS certificate configured${NC}"
|
||||||
else
|
else
|
||||||
echo "${YELLOW}⚠️ TLS certificate not found${NC}"
|
echo "${YELLOW}⚠️ TLS certificate not found${NC}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if multiple replicas are running
|
# Check if multiple replicas are running for HA
|
||||||
local query_replicas=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1")
|
local signoz_replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "1")
|
||||||
if [[ $query_replicas -gt 1 ]]; then
|
if [[ $signoz_replicas -gt 1 ]]; then
|
||||||
echo "${GREEN}✅ High availability configured ($query_replicas replicas)${NC}"
|
echo "${GREEN}✅ High availability configured ($signoz_replicas SigNoz replicas)${NC}"
|
||||||
else
|
else
|
||||||
echo "${YELLOW}⚠️ Single replica detected (not highly available)${NC}"
|
echo "${YELLOW}⚠️ Single SigNoz replica detected (not highly available)${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check Zookeeper replicas (critical for production)
|
||||||
|
local zk_replicas=$(kubectl get statefulset -n "$NAMESPACE" -l app.kubernetes.io/component=zookeeper -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "0")
|
||||||
|
if [[ $zk_replicas -eq 3 ]]; then
|
||||||
|
echo "${GREEN}✅ Zookeeper properly configured with 3 replicas${NC}"
|
||||||
|
elif [[ $zk_replicas -gt 0 ]]; then
|
||||||
|
echo "${YELLOW}⚠️ Zookeeper has $zk_replicas replicas (recommend 3 for production)${NC}"
|
||||||
|
else
|
||||||
|
echo "${RED}❌ Zookeeper not found${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check OTel Collector replicas
|
||||||
|
local otel_replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=otel-collector -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "1")
|
||||||
|
if [[ $otel_replicas -gt 1 ]]; then
|
||||||
|
echo "${GREEN}✅ OTel Collector HA configured ($otel_replicas replicas)${NC}"
|
||||||
|
else
|
||||||
|
echo "${YELLOW}⚠️ Single OTel Collector replica${NC}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check resource limits (should be higher for prod)
|
# Check resource limits (should be higher for prod)
|
||||||
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
local signoz_mem=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||||
if [[ -n "$query_service" && "$query_service" == "2Gi" ]]; then
|
if [[ -n "$signoz_mem" ]]; then
|
||||||
echo "${GREEN}✅ Production resource limits applied${NC}"
|
echo "${GREEN}✅ Production resource limits applied (memory: $signoz_mem)${NC}"
|
||||||
else
|
else
|
||||||
echo "${YELLOW}⚠️ Resource limits may not be optimized for production${NC}"
|
echo "${YELLOW}⚠️ Could not verify resource limits${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check HPA (Horizontal Pod Autoscaler)
|
||||||
|
local hpa_count=$(kubectl get hpa -n "$NAMESPACE" 2>/dev/null | grep -c signoz || echo "0")
|
||||||
|
if [[ $hpa_count -gt 0 ]]; then
|
||||||
|
echo "${GREEN}✅ Horizontal Pod Autoscaler configured${NC}"
|
||||||
|
else
|
||||||
|
echo "${YELLOW}⚠️ No HPA found (consider enabling for production)${NC}"
|
||||||
fi
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
}
|
}
|
||||||
@@ -280,37 +314,48 @@ show_access_info() {
|
|||||||
echo "${NC}"
|
echo "${NC}"
|
||||||
|
|
||||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||||
echo "SigNoz UI: https://localhost/signoz"
|
echo "SigNoz UI: http://monitoring.bakery-ia.local"
|
||||||
echo "SigNoz API: https://localhost/signoz-api"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "OpenTelemetry Collector:"
|
echo "OpenTelemetry Collector (within cluster):"
|
||||||
echo " gRPC: localhost:4317"
|
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
|
||||||
echo " HTTP: localhost:4318"
|
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
|
||||||
echo " Metrics: localhost:8888"
|
echo ""
|
||||||
|
echo "Port-forward for local access:"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4317:4317"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4318:4318"
|
||||||
else
|
else
|
||||||
echo "SigNoz UI: https://monitoring.bakewise.ai/signoz"
|
echo "SigNoz UI: https://monitoring.bakewise.ai"
|
||||||
echo "SigNoz API: https://monitoring.bakewise.ai/signoz-api"
|
|
||||||
echo "SigNoz Alerts: https://monitoring.bakewise.ai/signoz-alerts"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "OpenTelemetry Collector:"
|
echo "OpenTelemetry Collector (within cluster):"
|
||||||
echo " gRPC: monitoring.bakewise.ai:4317"
|
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
|
||||||
echo " HTTP: monitoring.bakewise.ai:4318"
|
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Default Credentials:"
|
echo "Default Credentials:"
|
||||||
echo " Username: admin"
|
echo " Username: admin@example.com"
|
||||||
echo " Password: admin"
|
echo " Password: admin"
|
||||||
echo ""
|
echo ""
|
||||||
|
echo "⚠️ IMPORTANT: Change default password after first login!"
|
||||||
|
echo ""
|
||||||
|
|
||||||
# Show connection test commands
|
# Show connection test commands
|
||||||
echo "Connection Test Commands:"
|
echo "Connection Test Commands:"
|
||||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||||
echo " curl -k https://localhost/signoz"
|
echo " # Test SigNoz UI"
|
||||||
echo " curl -k https://localhost/signoz-api/health"
|
echo " curl http://monitoring.bakery-ia.local"
|
||||||
|
echo ""
|
||||||
|
echo " # Test via port-forward"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
|
||||||
|
echo " curl http://localhost:8080"
|
||||||
else
|
else
|
||||||
echo " curl https://monitoring.bakewise.ai/signoz"
|
echo " # Test SigNoz UI"
|
||||||
echo " curl https://monitoring.bakewise.ai/signoz-api/health"
|
echo " curl https://monitoring.bakewise.ai"
|
||||||
|
echo ""
|
||||||
|
echo " # Test API health"
|
||||||
|
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
|
||||||
|
echo " curl http://localhost:8080/api/v1/health"
|
||||||
fi
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
}
|
}
|
||||||
@@ -323,35 +368,42 @@ run_connectivity_tests() {
|
|||||||
echo "=========================================="
|
echo "=========================================="
|
||||||
echo "${NC}"
|
echo "${NC}"
|
||||||
|
|
||||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
# Test pod readiness first
|
||||||
# Test frontend
|
echo "Checking pod readiness..."
|
||||||
echo "Testing SigNoz frontend..."
|
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep "Running" | grep -c "1/1\|2/2" || echo "0")
|
||||||
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz | grep -q "200\|302"; then
|
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
|
||||||
echo "${GREEN}✅ Frontend accessible${NC}"
|
|
||||||
else
|
|
||||||
echo "${RED}❌ Frontend not accessible${NC}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Test API
|
if [[ $ready_pods -eq $total_pods && $total_pods -gt 0 ]]; then
|
||||||
echo "Testing SigNoz API..."
|
echo "${GREEN}✅ All pods are ready ($ready_pods/$total_pods)${NC}"
|
||||||
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz-api/health | grep -q "200"; then
|
|
||||||
echo "${GREEN}✅ API accessible${NC}"
|
|
||||||
else
|
else
|
||||||
echo "${RED}❌ API not accessible${NC}"
|
echo "${YELLOW}⚠️ Some pods not ready ($ready_pods/$total_pods)${NC}"
|
||||||
fi
|
|
||||||
|
|
||||||
# Test OTEL collector
|
|
||||||
echo "Testing OpenTelemetry collector..."
|
|
||||||
if curl -s -o /dev/null -w "%{http_code}" http://localhost:8888/metrics | grep -q "200"; then
|
|
||||||
echo "${GREEN}✅ OTEL collector accessible${NC}"
|
|
||||||
else
|
|
||||||
echo "${YELLOW}⚠️ OTEL collector not accessible (may not be exposed)${NC}"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "${YELLOW}⚠️ Production connectivity tests require valid DNS and TLS${NC}"
|
|
||||||
echo " Please ensure monitoring.bakewise.ai resolves to your cluster"
|
|
||||||
fi
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
# Test internal service connectivity
|
||||||
|
echo "Testing internal service connectivity..."
|
||||||
|
local signoz_svc=$(kubectl get svc -n "$NAMESPACE" signoz -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
|
||||||
|
if [[ -n "$signoz_svc" ]]; then
|
||||||
|
echo "${GREEN}✅ SigNoz service accessible at $signoz_svc:8080${NC}"
|
||||||
|
else
|
||||||
|
echo "${RED}❌ SigNoz service not found${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
local otel_svc=$(kubectl get svc -n "$NAMESPACE" signoz-otel-collector -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
|
||||||
|
if [[ -n "$otel_svc" ]]; then
|
||||||
|
echo "${GREEN}✅ OTel Collector service accessible at $otel_svc:4317 (gRPC), $otel_svc:4318 (HTTP)${NC}"
|
||||||
|
else
|
||||||
|
echo "${RED}❌ OTel Collector service not found${NC}"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [[ "$ENVIRONMENT" == "prod" ]]; then
|
||||||
|
echo "${YELLOW}⚠️ Production connectivity tests require valid DNS and TLS${NC}"
|
||||||
|
echo " Please ensure monitoring.bakewise.ai resolves to your cluster"
|
||||||
|
echo ""
|
||||||
|
echo "Manual test:"
|
||||||
|
echo " curl -I https://monitoring.bakewise.ai"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Main execution
|
# Main execution
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "ai-insights-service"
|
value: "ai-insights-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -98,9 +98,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "auth-service"
|
value: "auth-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -62,9 +62,9 @@ spec:
|
|||||||
value: "3"
|
value: "3"
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "distribution-service"
|
value: "distribution-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -90,9 +90,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "external-service"
|
value: "external-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "forecasting-service"
|
value: "forecasting-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ spec:
|
|||||||
name: whatsapp-secrets
|
name: whatsapp-secrets
|
||||||
env:
|
env:
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4317"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "256Mi"
|
memory: "256Mi"
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "inventory-service"
|
value: "inventory-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "notification-service"
|
value: "notification-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "orchestrator-service"
|
value: "orchestrator-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "orders-service"
|
value: "orders-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "pos-service"
|
value: "pos-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "procurement-service"
|
value: "procurement-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "production-service"
|
value: "production-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "recipes-service"
|
value: "recipes-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "sales-service"
|
value: "sales-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "suppliers-service"
|
value: "suppliers-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "tenant-service"
|
value: "tenant-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -97,9 +97,9 @@ spec:
|
|||||||
env:
|
env:
|
||||||
# OpenTelemetry Configuration
|
# OpenTelemetry Configuration
|
||||||
- name: OTEL_COLLECTOR_ENDPOINT
|
- name: OTEL_COLLECTOR_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4318"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||||
- name: OTEL_SERVICE_NAME
|
- name: OTEL_SERVICE_NAME
|
||||||
value: "training-service"
|
value: "training-service"
|
||||||
- name: ENABLE_TRACING
|
- name: ENABLE_TRACING
|
||||||
|
|||||||
@@ -385,13 +385,13 @@ data:
|
|||||||
# OBSERVABILITY - SigNoz (Unified Monitoring)
|
# OBSERVABILITY - SigNoz (Unified Monitoring)
|
||||||
# ================================================================
|
# ================================================================
|
||||||
# OpenTelemetry Configuration - Direct to SigNoz
|
# OpenTelemetry Configuration - Direct to SigNoz
|
||||||
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.signoz.svc.cluster.local:4317"
|
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||||
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
|
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
|
||||||
OTEL_SERVICE_NAME: "bakery-ia"
|
OTEL_SERVICE_NAME: "bakery-ia"
|
||||||
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=development"
|
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=development"
|
||||||
|
|
||||||
# SigNoz Endpoints (v0.106.0+ unified service)
|
# SigNoz Endpoints (v0.106.0+ unified service)
|
||||||
SIGNOZ_ENDPOINT: "http://signoz.signoz.svc.cluster.local:8080"
|
SIGNOZ_ENDPOINT: "http://signoz.bakery-ia.svc.cluster.local:8080"
|
||||||
SIGNOZ_FRONTEND_URL: "https://monitoring.bakery-ia.local"
|
SIGNOZ_FRONTEND_URL: "https://monitoring.bakery-ia.local"
|
||||||
|
|
||||||
# ================================================================
|
# ================================================================
|
||||||
|
|||||||
@@ -73,7 +73,14 @@ spec:
|
|||||||
name: gateway-service
|
name: gateway-service
|
||||||
port:
|
port:
|
||||||
number: 8000
|
number: 8000
|
||||||
# Note: SigNoz monitoring is deployed via Helm in the 'signoz' namespace
|
# SigNoz Monitoring on subdomain (deployed via Helm in bakery-ia namespace)
|
||||||
# SigNoz creates its own Ingress via Helm chart configuration (signoz-values-dev.yaml)
|
- host: monitoring.bakery-ia.local
|
||||||
# Access at: https://monitoring.bakery-ia.local/
|
http:
|
||||||
# SignOz is served at the root of the monitoring subdomain
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: signoz
|
||||||
|
port:
|
||||||
|
number: 8080
|
||||||
@@ -61,7 +61,7 @@ patches:
|
|||||||
value: "true"
|
value: "true"
|
||||||
- op: add
|
- op: add
|
||||||
path: /data/OTEL_EXPORTER_OTLP_ENDPOINT
|
path: /data/OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
value: "http://signoz-otel-collector.signoz.svc.cluster.local:4317"
|
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||||
- op: add
|
- op: add
|
||||||
path: /data/OTEL_EXPORTER_OTLP_PROTOCOL
|
path: /data/OTEL_EXPORTER_OTLP_PROTOCOL
|
||||||
value: "grpc"
|
value: "grpc"
|
||||||
|
|||||||
@@ -23,13 +23,13 @@ data:
|
|||||||
ENABLE_LOGS: "true"
|
ENABLE_LOGS: "true"
|
||||||
|
|
||||||
# OpenTelemetry Configuration - Direct to SigNoz
|
# OpenTelemetry Configuration - Direct to SigNoz
|
||||||
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.signoz.svc.cluster.local:4317"
|
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||||
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
|
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
|
||||||
OTEL_SERVICE_NAME: "bakery-ia"
|
OTEL_SERVICE_NAME: "bakery-ia"
|
||||||
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=production,cluster.name=bakery-ia-prod"
|
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=production,cluster.name=bakery-ia-prod"
|
||||||
|
|
||||||
# SigNoz Endpoints (v0.106.0+ unified service)
|
# SigNoz Endpoints (v0.106.0+ unified service)
|
||||||
SIGNOZ_ENDPOINT: "http://signoz.signoz.svc.cluster.local:8080"
|
SIGNOZ_ENDPOINT: "http://signoz.bakery-ia.svc.cluster.local:8080"
|
||||||
SIGNOZ_FRONTEND_URL: "https://monitoring.bakewise.ai"
|
SIGNOZ_FRONTEND_URL: "https://monitoring.bakewise.ai"
|
||||||
SIGNOZ_ROOT_URL: "https://monitoring.bakewise.ai"
|
SIGNOZ_ROOT_URL: "https://monitoring.bakewise.ai"
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ spec:
|
|||||||
tls:
|
tls:
|
||||||
- hosts:
|
- hosts:
|
||||||
- bakewise.ai
|
- bakewise.ai
|
||||||
|
- monitoring.bakewise.ai
|
||||||
secretName: bakery-ia-prod-tls-cert
|
secretName: bakery-ia-prod-tls-cert
|
||||||
rules:
|
rules:
|
||||||
- host: bakewise.ai
|
- host: bakewise.ai
|
||||||
@@ -60,6 +61,14 @@ spec:
|
|||||||
name: gateway-service
|
name: gateway-service
|
||||||
port:
|
port:
|
||||||
number: 8000
|
number: 8000
|
||||||
# Note: SigNoz monitoring is deployed via Helm in the 'signoz' namespace
|
# SigNoz Monitoring on subdomain (deployed via Helm in bakery-ia namespace)
|
||||||
# SigNoz creates its own Ingress via Helm chart configuration
|
- host: monitoring.bakewise.ai
|
||||||
# Access at: https://monitoring.bakewise.ai (configured in signoz-values-prod.yaml)
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: signoz
|
||||||
|
port:
|
||||||
|
number: 8080
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ def setup_tracing(service_name: str = "ai-insights"):
|
|||||||
resource = Resource.create({"service.name": service_name})
|
resource = Resource.create({"service.name": service_name})
|
||||||
|
|
||||||
otlp_exporter = OTLPSpanExporter(
|
otlp_exporter = OTLPSpanExporter(
|
||||||
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector.monitoring.svc.cluster.local:4317"),
|
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"),
|
||||||
insecure=True
|
insecure=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ def setup_tracing(service_name: str = "alert-processor"):
|
|||||||
resource = Resource.create({"service.name": service_name})
|
resource = Resource.create({"service.name": service_name})
|
||||||
|
|
||||||
otlp_exporter = OTLPSpanExporter(
|
otlp_exporter = OTLPSpanExporter(
|
||||||
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector.monitoring.svc.cluster.local:4317"),
|
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"),
|
||||||
insecure=True
|
insecure=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ def setup_tracing(service_name: str = "demo-session"):
|
|||||||
resource = Resource.create({"service.name": service_name})
|
resource = Resource.create({"service.name": service_name})
|
||||||
|
|
||||||
otlp_exporter = OTLPSpanExporter(
|
otlp_exporter = OTLPSpanExporter(
|
||||||
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector.monitoring.svc.cluster.local:4317"),
|
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"),
|
||||||
insecure=True
|
insecure=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ def setup_otel_logging(
|
|||||||
if otel_endpoint is None:
|
if otel_endpoint is None:
|
||||||
otel_endpoint = os.getenv(
|
otel_endpoint = os.getenv(
|
||||||
"OTEL_EXPORTER_OTLP_ENDPOINT",
|
"OTEL_EXPORTER_OTLP_ENDPOINT",
|
||||||
os.getenv("OTEL_COLLECTOR_ENDPOINT", "http://signoz-otel-collector.signoz:4318")
|
os.getenv("OTEL_COLLECTOR_ENDPOINT", "http://signoz-otel-collector.bakery-ia:4318")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ensure endpoint has /v1/logs path for HTTP
|
# Ensure endpoint has /v1/logs path for HTTP
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ def setup_otel_metrics(
|
|||||||
if otel_endpoint is None:
|
if otel_endpoint is None:
|
||||||
otel_endpoint = os.getenv(
|
otel_endpoint = os.getenv(
|
||||||
"OTEL_EXPORTER_OTLP_ENDPOINT",
|
"OTEL_EXPORTER_OTLP_ENDPOINT",
|
||||||
os.getenv("OTEL_COLLECTOR_ENDPOINT", "http://signoz-otel-collector.signoz:4318")
|
os.getenv("OTEL_COLLECTOR_ENDPOINT", "http://signoz-otel-collector.bakery-ia:4318")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ensure endpoint has /v1/metrics path for HTTP
|
# Ensure endpoint has /v1/metrics path for HTTP
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ def setup_tracing(
|
|||||||
app,
|
app,
|
||||||
service_name: str,
|
service_name: str,
|
||||||
service_version: str = "1.0.0",
|
service_version: str = "1.0.0",
|
||||||
otel_endpoint: str = "http://signoz-otel-collector.signoz:4318"
|
otel_endpoint: str = "http://signoz-otel-collector.bakery-ia:4318"
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Setup OpenTelemetry distributed tracing for a FastAPI service.
|
Setup OpenTelemetry distributed tracing for a FastAPI service.
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ class BaseFastAPIService:
|
|||||||
try:
|
try:
|
||||||
otel_endpoint = os.getenv(
|
otel_endpoint = os.getenv(
|
||||||
"OTEL_COLLECTOR_ENDPOINT",
|
"OTEL_COLLECTOR_ENDPOINT",
|
||||||
"http://signoz-otel-collector.signoz:4318"
|
"http://signoz-otel-collector.bakery-ia:4318"
|
||||||
)
|
)
|
||||||
setup_tracing(self.app, self.service_name, self.version, otel_endpoint)
|
setup_tracing(self.app, self.service_name, self.version, otel_endpoint)
|
||||||
self.logger.info(f"Distributed tracing enabled for {self.service_name}")
|
self.logger.info(f"Distributed tracing enabled for {self.service_name}")
|
||||||
|
|||||||
Reference in New Issue
Block a user