Improve docker config

This commit is contained in:
Urtzi Alfaro
2025-07-20 02:16:51 +02:00
parent 9a67f3d175
commit 1c730c3c81
27 changed files with 2598 additions and 1161 deletions

View File

@@ -1,11 +1,15 @@
# infrastructure/monitoring/grafana/dashboards/dashboard.yml
# Grafana dashboard provisioning
apiVersion: 1
providers:
- name: 'Bakery Forecasting'
- name: 'bakery-dashboards'
orgId: 1
folder: ''
folder: 'Bakery Forecasting'
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View File

@@ -1,3 +1,6 @@
# infrastructure/monitoring/grafana/datasources/prometheus.yml
# Grafana Prometheus datasource configuration
apiVersion: 1
datasources:
@@ -6,4 +9,20 @@ datasources:
access: proxy
url: http://prometheus:9090
isDefault: true
version: 1
editable: true
jsonData:
timeInterval: "15s"
queryTimeout: "60s"
httpMethod: "POST"
exemplarTraceIdDestinations:
- name: trace_id
datasourceUid: jaeger
- name: Jaeger
type: jaeger
access: proxy
url: http://jaeger:16686
uid: jaeger
version: 1
editable: true

View File

@@ -1,17 +1,30 @@
---
# infrastructure/monitoring/prometheus/prometheus.yml
# Prometheus configuration
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: 'bakery-forecasting'
replica: 'prometheus-01'
rule_files:
- "alerts.yml"
- "/etc/prometheus/rules/*.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
scrape_configs:
# Service discovery for microservices
- job_name: 'gateway'
static_configs:
- targets: ['gateway:8000']
metrics_path: '/metrics'
scrape_interval: 30s
scrape_timeout: 10s
- job_name: 'auth-service'
static_configs:
@@ -49,11 +62,21 @@ scrape_configs:
metrics_path: '/metrics'
scrape_interval: 30s
# Infrastructure monitoring
- job_name: 'redis'
static_configs:
- targets: ['redis:6379']
metrics_path: '/metrics'
scrape_interval: 30s
- job_name: 'rabbitmq'
static_configs:
- targets: ['rabbitmq:15692']
metrics_path: '/metrics'
scrape_interval: 30s
# Database monitoring (requires postgres_exporter)
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
scrape_interval: 30s

View File

@@ -0,0 +1,86 @@
# infrastructure/monitoring/prometheus/rules/alerts.yml
# Prometheus alerting rules
groups:
- name: bakery_services
rules:
# Service availability alerts
- alert: ServiceDown
expr: up == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Service {{ $labels.job }} is down"
description: "Service {{ $labels.job }} has been down for more than 2 minutes."
# High error rate alerts
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "High error rate on {{ $labels.job }}"
description: "Error rate is {{ $value }} errors per second on {{ $labels.job }}."
# High response time alerts
- alert: HighResponseTime
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "High response time on {{ $labels.job }}"
description: "95th percentile response time is {{ $value }}s on {{ $labels.job }}."
# Memory usage alerts
- alert: HighMemoryUsage
expr: process_resident_memory_bytes / 1024 / 1024 > 500
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage on {{ $labels.job }}"
description: "Memory usage is {{ $value }}MB on {{ $labels.job }}."
# Database connection alerts
- alert: DatabaseConnectionHigh
expr: pg_stat_activity_count > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High database connections"
description: "Database has {{ $value }} active connections."
- name: bakery_business
rules:
# Training job alerts
- alert: TrainingJobFailed
expr: increase(training_jobs_failed_total[1h]) > 0
labels:
severity: warning
annotations:
summary: "Training job failed"
description: "{{ $value }} training jobs have failed in the last hour."
# Prediction accuracy alerts
- alert: LowPredictionAccuracy
expr: prediction_accuracy < 0.7
for: 15m
labels:
severity: warning
annotations:
summary: "Low prediction accuracy"
description: "Prediction accuracy is {{ $value }} for tenant {{ $labels.tenant_id }}."
# API rate limit alerts
- alert: APIRateLimitHit
expr: increase(rate_limit_hits_total[5m]) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "API rate limit hit frequently"
description: "Rate limit has been hit {{ $value }} times in 5 minutes."

View File

@@ -0,0 +1,6 @@
auth-db:5432:auth_db:auth_user:auth_pass123
training-db:5432:training_db:training_user:training_pass123
forecasting-db:5432:forecasting_db:forecasting_user:forecasting_pass123
data-db:5432:data_db:data_user:data_pass123
tenant-db:5432:tenant_db:tenant_user:tenant_pass123
notification-db:5432:notification_db:notification_user:notification_pass123

View File

@@ -0,0 +1,64 @@
{
"Servers": {
"1": {
"Name": "Auth Database",
"Group": "Bakery Services",
"Host": "auth-db",
"Port": 5432,
"MaintenanceDB": "auth_db",
"Username": "auth_user",
"PassFile": "/pgadmin4/pgpass",
"SSLMode": "prefer"
},
"2": {
"Name": "Training Database",
"Group": "Bakery Services",
"Host": "training-db",
"Port": 5432,
"MaintenanceDB": "training_db",
"Username": "training_user",
"PassFile": "/pgadmin4/pgpass",
"SSLMode": "prefer"
},
"3": {
"Name": "Forecasting Database",
"Group": "Bakery Services",
"Host": "forecasting-db",
"Port": 5432,
"MaintenanceDB": "forecasting_db",
"Username": "forecasting_user",
"PassFile": "/pgadmin4/pgpass",
"SSLMode": "prefer"
},
"4": {
"Name": "Data Database",
"Group": "Bakery Services",
"Host": "data-db",
"Port": 5432,
"MaintenanceDB": "data_db",
"Username": "data_user",
"PassFile": "/pgadmin4/pgpass",
"SSLMode": "prefer"
},
"5": {
"Name": "Tenant Database",
"Group": "Bakery Services",
"Host": "tenant-db",
"Port": 5432,
"MaintenanceDB": "tenant_db",
"Username": "tenant_user",
"PassFile": "/pgadmin4/pgpass",
"SSLMode": "prefer"
},
"6": {
"Name": "Notification Database",
"Group": "Bakery Services",
"Host": "notification-db",
"Port": 5432,
"MaintenanceDB": "notification_db",
"Username": "notification_user",
"PassFile": "/pgadmin4/pgpass",
"SSLMode": "prefer"
}
}
}

View File

@@ -0,0 +1,26 @@
-- Create extensions for all databases
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_stat_statements";
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
-- Create Spanish collation for proper text sorting
-- This will be used for bakery names, product names, etc.
-- CREATE COLLATION IF NOT EXISTS spanish (provider = icu, locale = 'es-ES');
-- Set timezone to Madrid
SET timezone = 'Europe/Madrid';
-- Performance tuning for small to medium databases
ALTER SYSTEM SET shared_preload_libraries = 'pg_stat_statements';
ALTER SYSTEM SET max_connections = 100;
ALTER SYSTEM SET shared_buffers = '256MB';
ALTER SYSTEM SET effective_cache_size = '1GB';
ALTER SYSTEM SET maintenance_work_mem = '64MB';
ALTER SYSTEM SET checkpoint_completion_target = 0.9;
ALTER SYSTEM SET wal_buffers = '16MB';
ALTER SYSTEM SET default_statistics_target = 100;
ALTER SYSTEM SET random_page_cost = 1.1;
ALTER SYSTEM SET effective_io_concurrency = 200;
-- Reload configuration
SELECT pg_reload_conf();

View File

@@ -0,0 +1,94 @@
{
"rabbit_version": "3.12.0",
"rabbitmq_version": "3.12.0",
"product_name": "RabbitMQ",
"product_version": "3.12.0",
"users": [
{
"name": "bakery",
"password_hash": "hash_of_forecast123",
"hashing_algorithm": "rabbit_password_hashing_sha256",
"tags": ["administrator"]
}
],
"vhosts": [
{
"name": "/"
}
],
"permissions": [
{
"user": "bakery",
"vhost": "/",
"configure": ".*",
"write": ".*",
"read": ".*"
}
],
"exchanges": [
{
"name": "bakery_events",
"vhost": "/",
"type": "topic",
"durable": true,
"auto_delete": false,
"internal": false,
"arguments": {}
}
],
"queues": [
{
"name": "training_events",
"vhost": "/",
"durable": true,
"auto_delete": false,
"arguments": {
"x-message-ttl": 86400000
}
},
{
"name": "forecasting_events",
"vhost": "/",
"durable": true,
"auto_delete": false,
"arguments": {
"x-message-ttl": 86400000
}
},
{
"name": "notification_events",
"vhost": "/",
"durable": true,
"auto_delete": false,
"arguments": {
"x-message-ttl": 86400000
}
}
],
"bindings": [
{
"source": "bakery_events",
"vhost": "/",
"destination": "training_events",
"destination_type": "queue",
"routing_key": "training.*",
"arguments": {}
},
{
"source": "bakery_events",
"vhost": "/",
"destination": "forecasting_events",
"destination_type": "queue",
"routing_key": "forecasting.*",
"arguments": {}
},
{
"source": "bakery_events",
"vhost": "/",
"destination": "notification_events",
"destination_type": "queue",
"routing_key": "notification.*",
"arguments": {}
}
]
}

View File

@@ -0,0 +1,26 @@
# infrastructure/rabbitmq/rabbitmq.conf
# RabbitMQ configuration file
# Network settings
listeners.tcp.default = 5672
management.tcp.port = 15672
# Memory and disk thresholds
vm_memory_high_watermark.relative = 0.6
disk_free_limit.relative = 2.0
# Default user (will be overridden by environment variables)
default_user = bakery
default_pass = forecast123
default_vhost = /
# Management plugin
management.load_definitions = /etc/rabbitmq/definitions.json
# Logging
log.console = true
log.console.level = info
log.file = false
# Queue settings
queue_master_locator = min-masters

View File

@@ -0,0 +1,51 @@
# infrastructure/redis/redis.conf
# Redis configuration file
# Network settings
bind 0.0.0.0
port 6379
timeout 300
tcp-keepalive 300
# General settings
daemonize no
supervised no
pidfile /var/run/redis_6379.pid
loglevel notice
logfile ""
# Persistence settings
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
dir ./
# Append only file settings
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
# Memory management
maxmemory 512mb
maxmemory-policy allkeys-lru
maxmemory-samples 5
# Security
requirepass redis_pass123
# Slow log
slowlog-log-slower-than 10000
slowlog-max-len 128
# Client output buffer limits
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit replica 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60