Add minio support and forntend analitycs
This commit is contained in:
263
Tiltfile
263
Tiltfile
@@ -16,22 +16,142 @@
|
||||
# - Gateway only rebuilds when gateway/ or shared/ code changes
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TILT CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
# Update settings
|
||||
update_settings(
|
||||
max_parallel_updates=2, # Reduce parallel updates to avoid resource exhaustion
|
||||
k8s_upsert_timeout_secs=120 # Increase timeout for slower local builds
|
||||
)
|
||||
|
||||
# Ensure we're running in the correct context
|
||||
allow_k8s_contexts('kind-bakery-ia-local')
|
||||
|
||||
# =============================================================================
|
||||
# DISK SPACE MANAGEMENT & CLEANUP CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
# Disk space management settings
|
||||
disk_cleanup_enabled = True # Default to True, can be disabled with TILT_DISABLE_CLEANUP=true
|
||||
if 'TILT_DISABLE_CLEANUP' in os.environ:
|
||||
disk_cleanup_enabled = os.environ['TILT_DISABLE_CLEANUP'].lower() != 'true'
|
||||
|
||||
disk_space_threshold_gb = '10'
|
||||
if 'TILT_DISK_THRESHOLD_GB' in os.environ:
|
||||
disk_space_threshold_gb = os.environ['TILT_DISK_THRESHOLD_GB']
|
||||
|
||||
disk_cleanup_frequency_minutes = '30'
|
||||
if 'TILT_CLEANUP_FREQUENCY' in os.environ:
|
||||
disk_cleanup_frequency_minutes = os.environ['TILT_CLEANUP_FREQUENCY']
|
||||
|
||||
print("""
|
||||
DISK SPACE MANAGEMENT CONFIGURATION
|
||||
======================================
|
||||
Cleanup Enabled: {}
|
||||
Free Space Threshold: {}GB
|
||||
Cleanup Frequency: Every {} minutes
|
||||
|
||||
To disable cleanup: export TILT_DISABLE_CLEANUP=true
|
||||
To change threshold: export TILT_DISK_THRESHOLD_GB=20
|
||||
To change frequency: export TILT_CLEANUP_FREQUENCY=60
|
||||
""".format(
|
||||
'YES' if disk_cleanup_enabled else 'NO (TILT_DISABLE_CLEANUP=true)',
|
||||
disk_space_threshold_gb,
|
||||
disk_cleanup_frequency_minutes
|
||||
))
|
||||
|
||||
# Automatic cleanup scheduler (informational only - actual scheduling done externally)
|
||||
if disk_cleanup_enabled:
|
||||
local_resource(
|
||||
'automatic-disk-cleanup-info',
|
||||
cmd='''
|
||||
echo "Automatic disk cleanup is ENABLED"
|
||||
echo "Settings:"
|
||||
echo " - Threshold: ''' + disk_space_threshold_gb + ''' GB free space"
|
||||
echo " - Frequency: Every ''' + disk_cleanup_frequency_minutes + ''' minutes"
|
||||
echo ""
|
||||
echo "Note: Actual cleanup runs via external scheduling (cron job or similar)"
|
||||
echo "To run cleanup now: tilt trigger manual-disk-cleanup"
|
||||
''',
|
||||
labels=['99-cleanup'],
|
||||
auto_init=True,
|
||||
allow_parallel=False
|
||||
)
|
||||
|
||||
# Manual cleanup trigger (can be run on demand)
|
||||
local_resource(
|
||||
'manual-disk-cleanup',
|
||||
cmd='''
|
||||
echo "Starting manual disk cleanup..."
|
||||
python3 scripts/cleanup_disk_space.py --manual --verbose
|
||||
''',
|
||||
labels=['99-cleanup'],
|
||||
auto_init=False,
|
||||
allow_parallel=False
|
||||
)
|
||||
|
||||
# Disk space monitoring resource
|
||||
local_resource(
|
||||
'disk-space-monitor',
|
||||
cmd='''
|
||||
echo "DISK SPACE MONITORING"
|
||||
echo "======================================"
|
||||
|
||||
# Get disk usage
|
||||
df -h / | grep -v Filesystem | awk '{{print "Total: " $2 " | Used: " $3 " | Free: " $4 " | Usage: " $5}}'
|
||||
|
||||
# Get Docker disk usage
|
||||
echo ""
|
||||
echo "DOCKER DISK USAGE:"
|
||||
docker system df
|
||||
|
||||
# Get Kubernetes disk usage (if available)
|
||||
echo ""
|
||||
echo "KUBERNETES DISK USAGE:"
|
||||
kubectl get pvc -n bakery-ia --no-headers 2>/dev/null | awk '{{print "PVC: " $1 " | Status: " $2 " | Capacity: " $3 " | Used: " $4}}' || echo " Kubernetes PVCs not available"
|
||||
|
||||
echo ""
|
||||
echo "Cleanup Status:"
|
||||
if [ "{disk_cleanup_enabled}" = "True" ]; then
|
||||
echo " Automatic cleanup: ENABLED (every {disk_cleanup_frequency_minutes} minutes)"
|
||||
echo " Threshold: {disk_space_threshold_gb}GB free space"
|
||||
else
|
||||
echo " Automatic cleanup: DISABLED"
|
||||
echo " To enable: unset TILT_DISABLE_CLEANUP or set TILT_DISABLE_CLEANUP=false"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Manual cleanup commands:"
|
||||
echo " tilt trigger manual-disk-cleanup # Run cleanup now"
|
||||
echo " docker system prune -a # Manual Docker cleanup"
|
||||
echo " kubectl delete jobs --all # Clean up completed jobs"
|
||||
''',
|
||||
labels=['99-cleanup'],
|
||||
auto_init=False,
|
||||
allow_parallel=False
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# DOCKER REGISTRY CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
# Docker registry configuration
|
||||
# Set USE_DOCKERHUB=true environment variable to push images to Docker Hub
|
||||
# Otherwise, uses local registry for faster builds and deployments
|
||||
use_dockerhub = os.getenv('USE_DOCKERHUB', 'false').lower() == 'true'
|
||||
dockerhub_username = os.getenv('DOCKERHUB_USERNAME', 'uals')
|
||||
use_dockerhub = False # Default to False
|
||||
if 'USE_DOCKERHUB' in os.environ:
|
||||
use_dockerhub = os.environ['USE_DOCKERHUB'].lower() == 'true'
|
||||
|
||||
dockerhub_username = 'uals' # Default username
|
||||
if 'DOCKERHUB_USERNAME' in os.environ:
|
||||
dockerhub_username = os.environ['DOCKERHUB_USERNAME']
|
||||
|
||||
if use_dockerhub:
|
||||
print("""
|
||||
🐳 DOCKER HUB MODE ENABLED
|
||||
DOCKER HUB MODE ENABLED
|
||||
Images will be pushed to Docker Hub: docker.io/%s
|
||||
Make sure you're logged in: docker login
|
||||
To disable: unset USE_DOCKERHUB or set USE_DOCKERHUB=false
|
||||
@@ -39,7 +159,7 @@ if use_dockerhub:
|
||||
default_registry('docker.io/%s' % dockerhub_username)
|
||||
else:
|
||||
print("""
|
||||
🏠 LOCAL REGISTRY MODE
|
||||
LOCAL REGISTRY MODE
|
||||
Using local registry for faster builds: localhost:5001
|
||||
This registry is created by kubernetes_restart.sh script
|
||||
To use Docker Hub: export USE_DOCKERHUB=true
|
||||
@@ -52,20 +172,21 @@ else:
|
||||
|
||||
print("""
|
||||
======================================
|
||||
🔐 Bakery IA Secure Development Mode
|
||||
Bakery IA Secure Development Mode
|
||||
======================================
|
||||
|
||||
Security Features:
|
||||
✅ TLS encryption for PostgreSQL and Redis
|
||||
✅ Strong 32-character passwords
|
||||
✅ PersistentVolumeClaims (no data loss)
|
||||
✅ pgcrypto extension for encryption
|
||||
✅ PostgreSQL audit logging
|
||||
TLS encryption for PostgreSQL and Redis
|
||||
Strong 32-character passwords
|
||||
PersistentVolumeClaims (no data loss)
|
||||
Column encryption: pgcrypto extension
|
||||
Audit logging: PostgreSQL query logging
|
||||
Object storage: MinIO with TLS for ML models
|
||||
|
||||
Monitoring:
|
||||
📊 Service metrics available at /metrics endpoints
|
||||
🔍 Telemetry ready (traces, metrics, logs)
|
||||
ℹ️ SigNoz deployment optional for local dev (see signoz-info resource)
|
||||
Service metrics available at /metrics endpoints
|
||||
Telemetry ready (traces, metrics, logs)
|
||||
SigNoz deployment optional for local dev (see signoz-info resource)
|
||||
|
||||
Applying security configurations...
|
||||
""")
|
||||
@@ -74,7 +195,7 @@ Applying security configurations...
|
||||
local_resource(
|
||||
'dockerhub-secret',
|
||||
cmd='''
|
||||
echo "🐳 Setting up Docker Hub image pull secret..."
|
||||
echo "Setting up Docker Hub image pull secret..."
|
||||
|
||||
# Check if Docker Hub credentials are available
|
||||
if [ -n "$DOCKERHUB_USERNAME" ] && [ -n "$DOCKERHUB_PASSWORD" ]; then
|
||||
@@ -84,7 +205,7 @@ local_resource(
|
||||
echo " Attempting to use Docker CLI credentials..."
|
||||
./infrastructure/kubernetes/create-dockerhub-secret.sh
|
||||
else
|
||||
echo " ⚠️ Docker Hub credentials not found"
|
||||
echo " Docker Hub credentials not found"
|
||||
echo " To enable automatic Docker Hub authentication:"
|
||||
echo " 1. Run 'docker login', OR"
|
||||
echo " 2. Set environment variables:"
|
||||
@@ -103,13 +224,13 @@ local_resource(
|
||||
local_resource(
|
||||
'security-setup',
|
||||
cmd='''
|
||||
echo "📦 Applying security secrets and configurations..."
|
||||
echo "Applying security secrets and configurations..."
|
||||
kubectl apply -f infrastructure/kubernetes/base/secrets.yaml
|
||||
kubectl apply -f infrastructure/kubernetes/base/secrets/postgres-tls-secret.yaml
|
||||
kubectl apply -f infrastructure/kubernetes/base/secrets/redis-tls-secret.yaml
|
||||
kubectl apply -f infrastructure/kubernetes/base/configs/postgres-init-config.yaml
|
||||
kubectl apply -f infrastructure/kubernetes/base/configmaps/postgres-logging-config.yaml
|
||||
echo "✅ Security configurations applied"
|
||||
echo "Security configurations applied"
|
||||
''',
|
||||
resource_deps=['dockerhub-secret'],
|
||||
labels=['00-security'],
|
||||
@@ -120,7 +241,7 @@ local_resource(
|
||||
local_resource(
|
||||
'verify-tls',
|
||||
cmd='''
|
||||
echo "🔍 Verifying TLS configuration..."
|
||||
echo "Verifying TLS configuration..."
|
||||
sleep 5 # Wait for pods to be ready
|
||||
|
||||
# Check if auth-db pod exists and has TLS certs
|
||||
@@ -129,8 +250,8 @@ local_resource(
|
||||
if [ -n "$AUTH_POD" ]; then
|
||||
echo " Checking PostgreSQL TLS certificates..."
|
||||
kubectl exec -n bakery-ia "$AUTH_POD" -- ls -la /tls/ 2>/dev/null && \
|
||||
echo " ✅ PostgreSQL TLS certificates mounted" || \
|
||||
echo " ⚠️ PostgreSQL TLS certificates not found (pods may still be starting)"
|
||||
echo " PostgreSQL TLS certificates mounted" || \
|
||||
echo " PostgreSQL TLS certificates not found (pods may still be starting)"
|
||||
fi
|
||||
|
||||
# Check if redis pod exists and has TLS certs
|
||||
@@ -139,15 +260,14 @@ local_resource(
|
||||
if [ -n "$REDIS_POD" ]; then
|
||||
echo " Checking Redis TLS certificates..."
|
||||
kubectl exec -n bakery-ia "$REDIS_POD" -- ls -la /tls/ 2>/dev/null && \
|
||||
echo " ✅ Redis TLS certificates mounted" || \
|
||||
echo " ⚠️ Redis TLS certificates not found (pods may still be starting)"
|
||||
echo " Redis TLS certificates mounted" || \
|
||||
echo " Redis TLS certificates not found (pods may still be starting)"
|
||||
fi
|
||||
|
||||
echo "✅ TLS verification complete"
|
||||
echo "TLS verification complete"
|
||||
''',
|
||||
resource_deps=['auth-db', 'redis'],
|
||||
auto_init=True,
|
||||
trigger_mode=TRIGGER_MODE_MANUAL,
|
||||
labels=['00-security']
|
||||
)
|
||||
|
||||
@@ -155,15 +275,14 @@ local_resource(
|
||||
local_resource(
|
||||
'verify-pvcs',
|
||||
cmd='''
|
||||
echo "🔍 Verifying PersistentVolumeClaims..."
|
||||
kubectl get pvc -n bakery-ia | grep -E "NAME|db-pvc" || echo " ⚠️ PVCs not yet bound"
|
||||
echo "Verifying PersistentVolumeClaims..."
|
||||
kubectl get pvc -n bakery-ia | grep -E "NAME|db-pvc" || echo " PVCs not yet bound"
|
||||
PVC_COUNT=$(kubectl get pvc -n bakery-ia -o json | jq '.items | length')
|
||||
echo " Found $PVC_COUNT PVCs"
|
||||
echo "✅ PVC verification complete"
|
||||
echo "PVC verification complete"
|
||||
''',
|
||||
resource_deps=['auth-db'],
|
||||
auto_init=True,
|
||||
trigger_mode=TRIGGER_MODE_MANUAL,
|
||||
labels=['00-security']
|
||||
)
|
||||
|
||||
@@ -171,11 +290,11 @@ local_resource(
|
||||
local_resource(
|
||||
'cert-manager-install',
|
||||
cmd='''
|
||||
echo "📦 Installing cert-manager..."
|
||||
echo "Installing cert-manager..."
|
||||
|
||||
# Check if cert-manager CRDs already exist
|
||||
if kubectl get crd certificates.cert-manager.io >/dev/null 2>&1; then
|
||||
echo " ✅ cert-manager CRDs already installed"
|
||||
echo " cert-manager CRDs already installed"
|
||||
else
|
||||
echo " Installing cert-manager v1.13.2..."
|
||||
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.yaml
|
||||
@@ -184,10 +303,10 @@ local_resource(
|
||||
kubectl wait --for=condition=available --timeout=120s deployment/cert-manager -n cert-manager
|
||||
kubectl wait --for=condition=available --timeout=120s deployment/cert-manager-webhook -n cert-manager
|
||||
|
||||
echo " ✅ cert-manager installed and ready"
|
||||
echo " cert-manager installed and ready"
|
||||
fi
|
||||
|
||||
echo "✅ cert-manager verification complete"
|
||||
echo "cert-manager verification complete"
|
||||
''',
|
||||
labels=['00-security'],
|
||||
auto_init=True
|
||||
@@ -265,19 +384,21 @@ def build_python_service(service_name, service_path):
|
||||
# =============================================================================
|
||||
|
||||
# Frontend (React + Vite)
|
||||
frontend_debug_env = os.getenv('FRONTEND_DEBUG', 'false')
|
||||
frontend_debug_env = 'false' # Default to false
|
||||
if 'FRONTEND_DEBUG' in os.environ:
|
||||
frontend_debug_env = os.environ['FRONTEND_DEBUG']
|
||||
frontend_debug = frontend_debug_env.lower() == 'true'
|
||||
|
||||
if frontend_debug:
|
||||
print("""
|
||||
🐛 FRONTEND DEBUG MODE ENABLED
|
||||
FRONTEND DEBUG MODE ENABLED
|
||||
Building frontend with NO minification for easier debugging.
|
||||
Full React error messages will be displayed.
|
||||
To disable: unset FRONTEND_DEBUG or set FRONTEND_DEBUG=false
|
||||
""")
|
||||
else:
|
||||
print("""
|
||||
📦 FRONTEND PRODUCTION MODE
|
||||
FRONTEND PRODUCTION MODE
|
||||
Building frontend with minification for optimized performance.
|
||||
To enable debug mode: export FRONTEND_DEBUG=true
|
||||
""")
|
||||
@@ -384,6 +505,10 @@ k8s_resource('redis', resource_deps=['security-setup'], labels=['01-infrastructu
|
||||
k8s_resource('rabbitmq', labels=['01-infrastructure'])
|
||||
k8s_resource('nominatim', labels=['01-infrastructure'])
|
||||
|
||||
# MinIO Storage
|
||||
k8s_resource('minio', resource_deps=['security-setup'], labels=['01-infrastructure'])
|
||||
k8s_resource('minio-bucket-init', resource_deps=['minio'], labels=['01-infrastructure'])
|
||||
|
||||
# =============================================================================
|
||||
# MONITORING RESOURCES - SigNoz (Unified Observability)
|
||||
# =============================================================================
|
||||
@@ -392,25 +517,25 @@ k8s_resource('nominatim', labels=['01-infrastructure'])
|
||||
local_resource(
|
||||
'signoz-deploy',
|
||||
cmd='''
|
||||
echo "📊 Deploying SigNoz Monitoring Stack..."
|
||||
echo "Deploying SigNoz Monitoring Stack..."
|
||||
echo ""
|
||||
|
||||
# Ensure Docker Hub secret exists in bakery-ia namespace
|
||||
echo "🔐 Ensuring Docker Hub secret exists in bakery-ia namespace..."
|
||||
echo "Ensuring Docker Hub secret exists in bakery-ia namespace..."
|
||||
if ! kubectl get secret dockerhub-creds -n bakery-ia &>/dev/null; then
|
||||
echo " ⚠️ Docker Hub secret not found, attempting to create..."
|
||||
echo " Docker Hub secret not found, attempting to create..."
|
||||
./infrastructure/kubernetes/create-dockerhub-secret.sh || echo " Continuing without Docker Hub authentication..."
|
||||
else
|
||||
echo " ✅ Docker Hub secret exists"
|
||||
echo " Docker Hub secret exists"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Check if SigNoz is already deployed
|
||||
if helm list -n bakery-ia | grep -q signoz; then
|
||||
echo "✅ SigNoz already deployed, checking status..."
|
||||
echo "SigNoz already deployed, checking status..."
|
||||
helm status signoz -n bakery-ia
|
||||
else
|
||||
echo "🚀 Installing SigNoz..."
|
||||
echo "Installing SigNoz..."
|
||||
|
||||
# Add SigNoz Helm repository if not already added
|
||||
helm repo add signoz https://charts.signoz.io 2>/dev/null || true
|
||||
@@ -424,25 +549,23 @@ local_resource(
|
||||
--wait
|
||||
|
||||
echo ""
|
||||
echo "✅ SigNoz deployment completed"
|
||||
echo "SigNoz deployment completed"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "📈 SigNoz Access Information:"
|
||||
echo "SigNoz Access Information:"
|
||||
echo " URL: https://monitoring.bakery-ia.local"
|
||||
echo " Username: admin"
|
||||
echo " Password: admin"
|
||||
echo ""
|
||||
echo "🔧 OpenTelemetry Collector Endpoints:"
|
||||
echo "OpenTelemetry Collector Endpoints:"
|
||||
echo " gRPC: localhost:4317"
|
||||
echo " HTTP: localhost:4318"
|
||||
echo ""
|
||||
echo "💡 To check pod status: kubectl get pods -n signoz"
|
||||
echo "To check pod status: kubectl get pods -n signoz"
|
||||
''',
|
||||
labels=['05-monitoring'],
|
||||
auto_init=False,
|
||||
trigger_mode=TRIGGER_MODE_MANUAL,
|
||||
allow_parallel=False
|
||||
)
|
||||
|
||||
# Track SigNoz pods in Tilt UI using workload tracking
|
||||
@@ -450,7 +573,7 @@ local_resource(
|
||||
local_resource(
|
||||
'signoz-status',
|
||||
cmd='''
|
||||
echo "📊 SigNoz Status Check"
|
||||
echo "SigNoz Status Check"
|
||||
echo ""
|
||||
|
||||
# Check pod status
|
||||
@@ -470,19 +593,17 @@ local_resource(
|
||||
echo "Pod Status: $READY_PODS/$TOTAL_PODS ready"
|
||||
|
||||
if [ "$READY_PODS" -eq "$TOTAL_PODS" ]; then
|
||||
echo "✅ All SigNoz pods are running!"
|
||||
echo "All SigNoz pods are running!"
|
||||
echo ""
|
||||
echo "Access SigNoz at: https://monitoring.bakery-ia.local"
|
||||
echo "Credentials: admin / admin"
|
||||
else
|
||||
echo "⏳ Waiting for pods to become ready..."
|
||||
echo "Waiting for pods to become ready..."
|
||||
fi
|
||||
fi
|
||||
''',
|
||||
labels=['05-monitoring'],
|
||||
resource_deps=['signoz-deploy'],
|
||||
auto_init=False,
|
||||
trigger_mode=TRIGGER_MODE_MANUAL
|
||||
)
|
||||
|
||||
# Optional exporters (in monitoring namespace) - DISABLED since using SigNoz
|
||||
@@ -566,7 +687,6 @@ k8s_resource('demo-session-migration', resource_deps=['demo-session-db'], labels
|
||||
k8s_resource('external-data-init', resource_deps=['external-migration', 'redis'], labels=['08-data-init'])
|
||||
k8s_resource('nominatim-init', labels=['08-data-init'])
|
||||
|
||||
# =============================================================================
|
||||
# =============================================================================
|
||||
# APPLICATION SERVICES
|
||||
# =============================================================================
|
||||
@@ -618,15 +738,9 @@ k8s_resource('demo-session-cleanup', resource_deps=['demo-session-service'], lab
|
||||
k8s_resource('external-data-rotation', resource_deps=['external-service'], labels=['16-cronjobs'])
|
||||
|
||||
# =============================================================================
|
||||
# TILT CONFIGURATION
|
||||
# WATCH SETTINGS
|
||||
# =============================================================================
|
||||
|
||||
# Update settings
|
||||
update_settings(
|
||||
max_parallel_updates=2, # Reduce parallel updates to avoid resource exhaustion
|
||||
k8s_upsert_timeout_secs=120 # Increase timeout for slower local builds
|
||||
)
|
||||
|
||||
# Watch settings
|
||||
watch_settings(
|
||||
ignore=[
|
||||
@@ -665,18 +779,19 @@ watch_settings(
|
||||
# =============================================================================
|
||||
|
||||
print("""
|
||||
✅ Security setup complete!
|
||||
Security setup complete!
|
||||
|
||||
Database Security Features Active:
|
||||
🔐 TLS encryption: PostgreSQL and Redis
|
||||
🔑 Strong passwords: 32-character cryptographic
|
||||
💾 Persistent storage: PVCs for all databases
|
||||
🔒 Column encryption: pgcrypto extension
|
||||
📋 Audit logging: PostgreSQL query logging
|
||||
TLS encryption: PostgreSQL and Redis
|
||||
Strong passwords: 32-character cryptographic
|
||||
Persistent storage: PVCs for all databases
|
||||
Column encryption: pgcrypto extension
|
||||
Audit logging: PostgreSQL query logging
|
||||
|
||||
Internal Schedulers Active:
|
||||
⏰ Alert Priority Recalculation: Hourly @ :15 (alert-processor)
|
||||
⏰ Usage Tracking: Daily @ 2:00 AM UTC (tenant-service)
|
||||
Alert Priority Recalculation: Hourly @ :15 (alert-processor)
|
||||
Usage Tracking: Daily @ 2:00 AM UTC (tenant-service)
|
||||
Disk Cleanup: Every {disk_cleanup_frequency_minutes} minutes (threshold: {disk_space_threshold_gb}GB)
|
||||
|
||||
Access your application:
|
||||
Main Application: https://bakery-ia.local
|
||||
@@ -708,11 +823,11 @@ Documentation:
|
||||
docs/DATABASE_SECURITY_ANALYSIS_REPORT.md
|
||||
|
||||
Build Optimization Active:
|
||||
✅ Services only rebuild when their code changes
|
||||
✅ Shared folder changes trigger ALL services (as expected)
|
||||
✅ Reduces unnecessary rebuilds and disk usage
|
||||
💡 Edit service code: only that service rebuilds
|
||||
💡 Edit shared/ code: all services rebuild (required)
|
||||
Services only rebuild when their code changes
|
||||
Shared folder changes trigger ALL services (as expected)
|
||||
Reduces unnecessary rebuilds and disk usage
|
||||
Edit service code: only that service rebuilds
|
||||
Edit shared/ code: all services rebuild (required)
|
||||
|
||||
Useful Commands:
|
||||
# Work on specific services only
|
||||
@@ -730,4 +845,4 @@ DNS Configuration:
|
||||
# 127.0.0.1 monitoring.bakery-ia.local
|
||||
|
||||
======================================
|
||||
""")
|
||||
""")
|
||||
154
docs/MINIO_CERTIFICATE_GENERATION_GUIDE.md
Normal file
154
docs/MINIO_CERTIFICATE_GENERATION_GUIDE.md
Normal file
@@ -0,0 +1,154 @@
|
||||
# MinIO Certificate Generation Guide
|
||||
|
||||
## Quick Start
|
||||
|
||||
To generate MinIO certificates with the correct format:
|
||||
|
||||
```bash
|
||||
# Generate certificates
|
||||
./infrastructure/tls/generate-minio-certificates.sh
|
||||
|
||||
# Update Kubernetes secret
|
||||
kubectl delete secret -n bakery-ia minio-tls
|
||||
kubectl apply -f infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
|
||||
|
||||
# Restart MinIO
|
||||
kubectl rollout restart deployment -n bakery-ia minio
|
||||
```
|
||||
|
||||
## Key Requirements
|
||||
|
||||
### Private Key Format
|
||||
✅ **Required**: Traditional RSA format (`BEGIN RSA PRIVATE KEY`)
|
||||
❌ **Problematic**: PKCS#8 format (`BEGIN PRIVATE KEY`)
|
||||
|
||||
### Certificate Files
|
||||
- `minio-cert.pem` - Server certificate
|
||||
- `minio-key.pem` - Private key (must be traditional RSA format)
|
||||
- `ca-cert.pem` - CA certificate
|
||||
|
||||
## Verification
|
||||
|
||||
### Check Private Key Format
|
||||
```bash
|
||||
head -1 infrastructure/tls/minio/minio-key.pem
|
||||
# Should output: -----BEGIN RSA PRIVATE KEY-----
|
||||
```
|
||||
|
||||
### Verify Certificate Chain
|
||||
```bash
|
||||
openssl verify -CAfile infrastructure/tls/ca/ca-cert.pem \
|
||||
infrastructure/tls/minio/minio-cert.pem
|
||||
```
|
||||
|
||||
### Check Certificate Details
|
||||
```bash
|
||||
openssl x509 -in infrastructure/tls/minio/minio-cert.pem -noout \
|
||||
-subject -issuer -dates
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Error: "The private key contains additional data"
|
||||
**Cause**: Private key is in PKCS#8 format instead of traditional RSA format
|
||||
|
||||
**Solution**: Convert the key:
|
||||
```bash
|
||||
openssl rsa -in minio-key.pem -traditional -out minio-key-fixed.pem
|
||||
mv minio-key-fixed.pem minio-key.pem
|
||||
```
|
||||
|
||||
### Error: "Unable to parse private key"
|
||||
**Cause**: Certificate/key mismatch or corrupted files
|
||||
|
||||
**Solution**: Regenerate certificates and verify:
|
||||
```bash
|
||||
# Check modulus of certificate and key (should match)
|
||||
openssl x509 -noout -modulus -in minio-cert.pem | openssl md5
|
||||
openssl rsa -noout -modulus -in minio-key.pem | openssl md5
|
||||
```
|
||||
|
||||
## Certificate Rotation
|
||||
|
||||
### Step-by-Step Process
|
||||
|
||||
1. **Generate new certificates**
|
||||
```bash
|
||||
./infrastructure/tls/generate-minio-certificates.sh
|
||||
```
|
||||
|
||||
2. **Update base64 values in secret**
|
||||
```bash
|
||||
# Update infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
|
||||
# with new base64 encoded certificate values
|
||||
```
|
||||
|
||||
3. **Apply updated secret**
|
||||
```bash
|
||||
kubectl delete secret -n bakery-ia minio-tls
|
||||
kubectl apply -f infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
|
||||
```
|
||||
|
||||
4. **Restart MinIO pods**
|
||||
```bash
|
||||
kubectl rollout restart deployment -n bakery-ia minio
|
||||
```
|
||||
|
||||
5. **Verify**
|
||||
```bash
|
||||
kubectl logs -n bakery-ia -l app.kubernetes.io/name=minio --tail=5
|
||||
# Should show: API: https://minio.bakery-ia.svc.cluster.local:9000
|
||||
```
|
||||
|
||||
## Technical Details
|
||||
|
||||
### Certificate Generation Process
|
||||
|
||||
1. **Generate private key** (RSA 4096-bit)
|
||||
2. **Convert to traditional RSA format** (critical for MinIO)
|
||||
3. **Create CSR** with proper SANs
|
||||
4. **Sign with CA** (valid for 3 years)
|
||||
5. **Set permissions** (600 for key, 644 for certs)
|
||||
|
||||
### SANs (Subject Alternative Names)
|
||||
|
||||
The certificate includes these SANs for comprehensive coverage:
|
||||
- `minio.bakery-ia.svc.cluster.local` (primary)
|
||||
- `minio.bakery-ia`
|
||||
- `minio-console.bakery-ia.svc.cluster.local`
|
||||
- `minio-console.bakery-ia`
|
||||
- `minio`
|
||||
- `minio-console`
|
||||
- `localhost`
|
||||
- `127.0.0.1`
|
||||
|
||||
### Secret Structure
|
||||
|
||||
The Kubernetes secret uses the standardized Opaque format:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: minio-tls
|
||||
namespace: bakery-ia
|
||||
type: Opaque
|
||||
data:
|
||||
ca-cert.pem: <base64>
|
||||
minio-cert.pem: <base64>
|
||||
minio-key.pem: <base64>
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always verify private key format** before applying
|
||||
2. **Test certificates** with `openssl verify` before deployment
|
||||
3. **Use the generation script** to ensure consistency
|
||||
4. **Document certificate expiration dates** for rotation planning
|
||||
5. **Monitor MinIO logs** after certificate updates
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [MinIO TLS Fix Summary](MINIO_TLS_FIX_SUMMARY.md)
|
||||
- [Kubernetes TLS Secrets Guide](../kubernetes-tls-guide.md)
|
||||
- [Certificate Management Best Practices](../certificate-management.md)
|
||||
@@ -34,20 +34,47 @@ server {
|
||||
# Note: API routing is handled by ingress, not by this nginx
|
||||
# The frontend makes requests to /api which are routed by the ingress controller
|
||||
|
||||
# Static assets with aggressive caching (including source maps for debugging)
|
||||
location ~* ^/assets/.*\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot|map)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
add_header Vary Accept-Encoding;
|
||||
# Source map files - serve with proper CORS headers and content type
|
||||
# Note: These are typically only needed in development, but served in production for error reporting
|
||||
location ~* ^/assets/.*\.map$ {
|
||||
# Short cache time to avoid mismatches with JS files
|
||||
expires 1m;
|
||||
add_header Cache-Control "public, must-revalidate";
|
||||
add_header Access-Control-Allow-Origin "*";
|
||||
add_header Access-Control-Allow-Methods "GET";
|
||||
add_header Access-Control-Allow-Headers "Content-Type";
|
||||
add_header Content-Type "application/json";
|
||||
# Disable access logging for source maps as they're requested frequently
|
||||
access_log off;
|
||||
try_files $uri =404;
|
||||
}
|
||||
|
||||
# Also handle JS and CSS files anywhere in the structure (for dynamic imports)
|
||||
location ~* \.(js|css)$ {
|
||||
# Static assets with appropriate caching
|
||||
# Note: JS/CSS files have content hashes for cache busting, but use shorter cache times to handle deployment issues
|
||||
location ~* ^/assets/.*\.(js|css)$ {
|
||||
expires 1h;
|
||||
add_header Cache-Control "public";
|
||||
add_header Vary Accept-Encoding;
|
||||
add_header Access-Control-Allow-Origin "*";
|
||||
access_log off;
|
||||
try_files $uri =404;
|
||||
}
|
||||
|
||||
# Static assets that don't change often (images, fonts) can have longer cache times
|
||||
location ~* ^/assets/.*\.(png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
add_header Vary Accept-Encoding;
|
||||
add_header Access-Control-Allow-Origin "*";
|
||||
access_log off;
|
||||
try_files $uri =404;
|
||||
}
|
||||
|
||||
# Handle JS and CSS files anywhere in the structure (for dynamic imports) with shorter cache
|
||||
location ~* \.(js|css)$ {
|
||||
expires 1h;
|
||||
add_header Cache-Control "public";
|
||||
add_header Vary Accept-Encoding;
|
||||
access_log off;
|
||||
try_files $uri =404;
|
||||
}
|
||||
|
||||
306
frontend/package-lock.json
generated
306
frontend/package-lock.json
generated
@@ -9,6 +9,13 @@
|
||||
"version": "2.0.0",
|
||||
"dependencies": {
|
||||
"@hookform/resolvers": "^3.3.2",
|
||||
"@opentelemetry/api": "^1.9.0",
|
||||
"@opentelemetry/exporter-metrics-otlp-http": "^0.210.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.210.0",
|
||||
"@opentelemetry/resources": "^2.4.0",
|
||||
"@opentelemetry/sdk-metrics": "^2.4.0",
|
||||
"@opentelemetry/sdk-trace-web": "^2.4.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.39.0",
|
||||
"@radix-ui/react-accordion": "^1.1.2",
|
||||
"@radix-ui/react-checkbox": "^1.0.4",
|
||||
"@radix-ui/react-dialog": "^1.0.5",
|
||||
@@ -2976,6 +2983,209 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@opentelemetry/api": {
|
||||
"version": "1.9.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
|
||||
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/api-logs": {
|
||||
"version": "0.210.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.210.0.tgz",
|
||||
"integrity": "sha512-CMtLxp+lYDriveZejpBND/2TmadrrhUfChyxzmkFtHaMDdSKfP59MAYyA0ICBvEBdm3iXwLcaj/8Ic/pnGw9Yg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/api": "^1.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/core": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.4.0.tgz",
|
||||
"integrity": "sha512-KtcyFHssTn5ZgDu6SXmUznS80OFs/wN7y6MyFRRcKU6TOw8hNcGxKvt8hsdaLJfhzUszNSjURetq5Qpkad14Gw==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/semantic-conventions": "^1.29.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": ">=1.0.0 <1.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/exporter-metrics-otlp-http": {
|
||||
"version": "0.210.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/exporter-metrics-otlp-http/-/exporter-metrics-otlp-http-0.210.0.tgz",
|
||||
"integrity": "sha512-JpLThG8Hh8A/Jzdzw9i4Ftu+EzvLaX/LouN+mOOHmadL0iror0Qsi3QWzucXeiUsDDsiYgjfKyi09e6sltytgA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/otlp-exporter-base": "0.210.0",
|
||||
"@opentelemetry/otlp-transformer": "0.210.0",
|
||||
"@opentelemetry/resources": "2.4.0",
|
||||
"@opentelemetry/sdk-metrics": "2.4.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/exporter-trace-otlp-http": {
|
||||
"version": "0.210.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/exporter-trace-otlp-http/-/exporter-trace-otlp-http-0.210.0.tgz",
|
||||
"integrity": "sha512-9JkyaCl70anEtuKZdoCQmjDuz1/paEixY/DWfsvHt7PGKq3t8/nQ/6/xwxHjG+SkPAUbo1Iq4h7STe7Pk2bc5A==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/otlp-exporter-base": "0.210.0",
|
||||
"@opentelemetry/otlp-transformer": "0.210.0",
|
||||
"@opentelemetry/resources": "2.4.0",
|
||||
"@opentelemetry/sdk-trace-base": "2.4.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/otlp-exporter-base": {
|
||||
"version": "0.210.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/otlp-exporter-base/-/otlp-exporter-base-0.210.0.tgz",
|
||||
"integrity": "sha512-uk78DcZoBNHIm26h0oXc8Pizh4KDJ/y04N5k/UaI9J7xR7mL8QcMcYPQG9xxN7m8qotXOMDRW6qTAyptav4+3w==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/otlp-transformer": "0.210.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/otlp-transformer": {
|
||||
"version": "0.210.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/otlp-transformer/-/otlp-transformer-0.210.0.tgz",
|
||||
"integrity": "sha512-nkHBJVSJGOwkRZl+BFIr7gikA93/U8XkL2EWaiDbj3DVjmTEZQpegIKk0lT8oqQYfP8FC6zWNjuTfkaBVqa0ZQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/api-logs": "0.210.0",
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/resources": "2.4.0",
|
||||
"@opentelemetry/sdk-logs": "0.210.0",
|
||||
"@opentelemetry/sdk-metrics": "2.4.0",
|
||||
"@opentelemetry/sdk-trace-base": "2.4.0",
|
||||
"protobufjs": "8.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/resources": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.4.0.tgz",
|
||||
"integrity": "sha512-RWvGLj2lMDZd7M/5tjkI/2VHMpXebLgPKvBUd9LRasEWR2xAynDwEYZuLvY9P2NGG73HF07jbbgWX2C9oavcQg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.29.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": ">=1.3.0 <1.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/sdk-logs": {
|
||||
"version": "0.210.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-logs/-/sdk-logs-0.210.0.tgz",
|
||||
"integrity": "sha512-YuaL92Dpyk/Kc1o4e9XiaWWwiC0aBFN+4oy+6A9TP4UNJmRymPMEX10r6EMMFMD7V0hktiSig9cwWo59peeLCQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/api-logs": "0.210.0",
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/resources": "2.4.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": ">=1.4.0 <1.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/sdk-metrics": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.4.0.tgz",
|
||||
"integrity": "sha512-qSbfq9mXbLMqmPEjijl32f3ZEmiHekebRggPdPjhHI6t1CsAQOR2Aw/SuTDftk3/l2aaPHpwP3xM2DkgBA1ANw==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/resources": "2.4.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": ">=1.9.0 <1.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/sdk-trace-base": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.4.0.tgz",
|
||||
"integrity": "sha512-WH0xXkz/OHORDLKqaxcUZS0X+t1s7gGlumr2ebiEgNZQl2b0upK2cdoD0tatf7l8iP74woGJ/Kmxe82jdvcWRw==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/resources": "2.4.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.29.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": ">=1.3.0 <1.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/sdk-trace-web": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-web/-/sdk-trace-web-2.4.0.tgz",
|
||||
"integrity": "sha512-1FYg7qnrgTugPev51SehxCp0v9J4P97MJn2MaXQ8QK//psfyLDorKAAC3LmSIhq7XaC726WSZ/Wm69r8NdjIsA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.4.0",
|
||||
"@opentelemetry/sdk-trace-base": "2.4.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.19.0 || >=20.6.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": ">=1.0.0 <1.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@opentelemetry/semantic-conventions": {
|
||||
"version": "1.39.0",
|
||||
"resolved": "https://registry.npmjs.org/@opentelemetry/semantic-conventions/-/semantic-conventions-1.39.0.tgz",
|
||||
"integrity": "sha512-R5R9tb2AXs2IRLNKLBJDynhkfmx7mX0vi8NkhZb3gUkPWHn6HXk5J8iQ/dql0U3ApfWym4kXXmBDRGO+oeOfjg==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@pkgjs/parseargs": {
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
|
||||
@@ -3010,6 +3220,70 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/base64": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/codegen": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
||||
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/eventemitter": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
||||
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/float": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
||||
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
||||
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
||||
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/pool": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/utf8": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@radix-ui/number": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz",
|
||||
@@ -6577,7 +6851,6 @@
|
||||
"version": "20.19.17",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.17.tgz",
|
||||
"integrity": "sha512-gfehUI8N1z92kygssiuWvLiwcbOB3IRktR6hTDgJlXMYh5OvkPSRmgfoBUmfZt+vhwJtX7v1Yw4KvvAf7c5QKQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
@@ -11721,6 +11994,12 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/long": {
|
||||
"version": "5.3.2",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
|
||||
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/loose-envify": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
|
||||
@@ -13119,6 +13398,30 @@
|
||||
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-8.0.0.tgz",
|
||||
"integrity": "sha512-jx6+sE9h/UryaCZhsJWbJtTEy47yXoGNYI4z8ZaRncM0zBKeRqjO2JEcOUYwrYGb1WLhXM1FfMzW3annvFv0rw==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.4",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.0",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.0",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-addr": {
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
|
||||
@@ -15451,7 +15754,6 @@
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/unicode-canonical-property-names-ecmascript": {
|
||||
|
||||
@@ -30,6 +30,13 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@hookform/resolvers": "^3.3.2",
|
||||
"@opentelemetry/api": "^1.9.0",
|
||||
"@opentelemetry/exporter-metrics-otlp-http": "^0.210.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.210.0",
|
||||
"@opentelemetry/resources": "^2.4.0",
|
||||
"@opentelemetry/sdk-metrics": "^2.4.0",
|
||||
"@opentelemetry/sdk-trace-web": "^2.4.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.39.0",
|
||||
"@radix-ui/react-accordion": "^1.1.2",
|
||||
"@radix-ui/react-checkbox": "^1.0.4",
|
||||
"@radix-ui/react-dialog": "^1.0.5",
|
||||
|
||||
66
frontend/src/components/AnalyticsTestComponent.tsx
Normal file
66
frontend/src/components/AnalyticsTestComponent.tsx
Normal file
@@ -0,0 +1,66 @@
|
||||
import React, { useState } from 'react';
|
||||
import { trackUserAction, trackUserLocation } from '../utils/analytics';
|
||||
|
||||
const AnalyticsTestComponent: React.FC = () => {
|
||||
const [locationStatus, setLocationStatus] = useState<string>('Not requested');
|
||||
const [actionStatus, setActionStatus] = useState<string>('');
|
||||
|
||||
const handleTrackLocation = async () => {
|
||||
try {
|
||||
setLocationStatus('Requesting...');
|
||||
await trackUserLocation();
|
||||
setLocationStatus('Location tracked successfully!');
|
||||
} catch (error) {
|
||||
setLocationStatus('Error tracking location');
|
||||
console.error('Location tracking error:', error);
|
||||
}
|
||||
};
|
||||
|
||||
const handleTrackAction = () => {
|
||||
const actionName = `button_click_${Date.now()}`;
|
||||
trackUserAction(actionName, {
|
||||
component: 'AnalyticsTestComponent',
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
setActionStatus(`Action "${actionName}" tracked`);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="p-6 max-w-2xl mx-auto bg-white rounded-lg shadow-md">
|
||||
<h2 className="text-xl font-bold mb-4">Analytics Test Component</h2>
|
||||
|
||||
<div className="mb-4">
|
||||
<button
|
||||
onClick={handleTrackLocation}
|
||||
className="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4"
|
||||
>
|
||||
Track Location
|
||||
</button>
|
||||
<span className="text-sm text-gray-600">{locationStatus}</span>
|
||||
</div>
|
||||
|
||||
<div className="mb-4">
|
||||
<button
|
||||
onClick={handleTrackAction}
|
||||
className="bg-green-500 hover:bg-green-700 text-white font-bold py-2 px-4 rounded"
|
||||
>
|
||||
Track Action
|
||||
</button>
|
||||
<span className="text-sm text-gray-600 ml-4">{actionStatus}</span>
|
||||
</div>
|
||||
|
||||
<div className="mt-6 p-4 bg-gray-100 rounded">
|
||||
<h3 className="font-semibold mb-2">Expected Behavior:</h3>
|
||||
<ul className="list-disc pl-5 space-y-1 text-sm">
|
||||
<li>Page views are automatically tracked when this component loads</li>
|
||||
<li>Session information is captured on initial load</li>
|
||||
<li>Browser and device info is collected automatically</li>
|
||||
<li>Clicking buttons will generate user action traces</li>
|
||||
<li>Location tracking requires user permission</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default AnalyticsTestComponent;
|
||||
@@ -5,6 +5,9 @@ interface RuntimeConfig {
|
||||
VITE_API_URL: string;
|
||||
VITE_APP_TITLE: string;
|
||||
VITE_APP_VERSION: string;
|
||||
VITE_OTEL_TRACES_ENDPOINT?: string;
|
||||
VITE_OTEL_METRICS_ENDPOINT?: string;
|
||||
VITE_OTEL_ENABLED?: string;
|
||||
}
|
||||
|
||||
declare global {
|
||||
@@ -27,6 +30,9 @@ function getRuntimeConfig(): RuntimeConfig {
|
||||
VITE_API_URL: import.meta.env.VITE_API_URL || 'http://localhost:8000',
|
||||
VITE_APP_TITLE: import.meta.env.VITE_APP_TITLE || 'PanIA Dashboard',
|
||||
VITE_APP_VERSION: import.meta.env.VITE_APP_VERSION || '1.0.0',
|
||||
VITE_OTEL_TRACES_ENDPOINT: import.meta.env.VITE_OTEL_TRACES_ENDPOINT || '/api/v1/telemetry/v1/traces',
|
||||
VITE_OTEL_METRICS_ENDPOINT: import.meta.env.VITE_OTEL_METRICS_ENDPOINT || '/api/v1/telemetry/v1/metrics',
|
||||
VITE_OTEL_ENABLED: import.meta.env.VITE_OTEL_ENABLED || 'true',
|
||||
};
|
||||
}
|
||||
|
||||
@@ -52,6 +58,21 @@ export function isKubernetesEnvironment(): boolean {
|
||||
return typeof window !== 'undefined' && !!window.__RUNTIME_CONFIG__;
|
||||
}
|
||||
|
||||
// Helper to check if OpenTelemetry is enabled
|
||||
export function isOpenTelemetryEnabled(): boolean {
|
||||
return config.VITE_OTEL_ENABLED?.toLowerCase() !== 'false';
|
||||
}
|
||||
|
||||
// Helper to get OpenTelemetry traces endpoint
|
||||
export function getOtelTracesEndpoint(): string {
|
||||
return config.VITE_OTEL_TRACES_ENDPOINT || '/api/v1/telemetry/v1/traces';
|
||||
}
|
||||
|
||||
// Helper to get OpenTelemetry metrics endpoint
|
||||
export function getOtelMetricsEndpoint(): string {
|
||||
return config.VITE_OTEL_METRICS_ENDPOINT || '/api/v1/telemetry/v1/metrics';
|
||||
}
|
||||
|
||||
// Debug function to log current configuration
|
||||
export function logConfig(): void {
|
||||
console.log('Current configuration:', {
|
||||
|
||||
33
frontend/src/hooks/useAnalytics.ts
Normal file
33
frontend/src/hooks/useAnalytics.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import {
|
||||
trackPageView,
|
||||
trackUserAction,
|
||||
trackUserLocation,
|
||||
trackSession,
|
||||
getCurrentUserId,
|
||||
isAnalyticsEnabled
|
||||
} from '../utils/analytics';
|
||||
|
||||
/**
|
||||
* React Hook for analytics
|
||||
*
|
||||
* NOTE: Page view tracking is handled globally by initializeAnalytics() in main.tsx.
|
||||
* This hook only exposes tracking functions for use in components.
|
||||
* Do NOT add automatic page tracking here to avoid duplicate events.
|
||||
*/
|
||||
export const useAnalytics = () => {
|
||||
return {
|
||||
// Manual page view tracking (use only for custom page events, not navigation)
|
||||
trackPageView,
|
||||
// Track user actions (button clicks, form submissions, etc.)
|
||||
trackUserAction,
|
||||
// Track user location (requires consent)
|
||||
trackUserLocation,
|
||||
// Track session (typically called once at app init)
|
||||
trackSession,
|
||||
// Get current user ID
|
||||
getCurrentUserId,
|
||||
// Check if analytics are enabled
|
||||
isAnalyticsEnabled
|
||||
};
|
||||
};
|
||||
|
||||
@@ -7,6 +7,92 @@ import './styles/animations.css';
|
||||
import './styles/themes/light.css';
|
||||
import './styles/themes/dark.css';
|
||||
|
||||
// OpenTelemetry Web SDK initialization
|
||||
import { WebTracerProvider } from '@opentelemetry/sdk-trace-web';
|
||||
import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
|
||||
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
|
||||
import { resourceFromAttributes } from '@opentelemetry/resources';
|
||||
import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic-conventions';
|
||||
import { MeterProvider, PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics';
|
||||
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http';
|
||||
import { metrics } from '@opentelemetry/api';
|
||||
|
||||
// Import analytics utilities
|
||||
import { initializeAnalytics } from './utils/analytics';
|
||||
|
||||
// Import configuration
|
||||
import { isOpenTelemetryEnabled, getOtelTracesEndpoint, getOtelMetricsEndpoint } from './config/runtime';
|
||||
|
||||
// Store cleanup function for proper teardown
|
||||
let analyticsCleanup: (() => void) | null = null;
|
||||
|
||||
// Initialize OpenTelemetry
|
||||
const initOpenTelemetry = () => {
|
||||
// Check if OpenTelemetry is enabled in configuration
|
||||
if (!isOpenTelemetryEnabled()) {
|
||||
console.log('OpenTelemetry disabled by configuration');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Create resource with service information using non-deprecated attributes
|
||||
const resource = resourceFromAttributes({
|
||||
[ATTR_SERVICE_NAME]: 'bakery-frontend',
|
||||
[ATTR_SERVICE_VERSION]: '1.0.0'
|
||||
});
|
||||
|
||||
// Initialize tracer with span processor
|
||||
const traceExporter = new OTLPTraceExporter({
|
||||
url: getOtelTracesEndpoint() // Using configured endpoint
|
||||
});
|
||||
|
||||
const traceProvider = new WebTracerProvider({
|
||||
resource: resource,
|
||||
// Add span processors as array for current OpenTelemetry SDK version
|
||||
spanProcessors: [new BatchSpanProcessor(traceExporter)]
|
||||
});
|
||||
|
||||
traceProvider.register();
|
||||
|
||||
// Initialize metrics
|
||||
const metricExporter = new OTLPMetricExporter({
|
||||
url: getOtelMetricsEndpoint()
|
||||
});
|
||||
|
||||
const metricReader = new PeriodicExportingMetricReader({
|
||||
exporter: metricExporter,
|
||||
exportIntervalMillis: 10000, // 10 seconds
|
||||
});
|
||||
|
||||
// Use the MeterProvider constructor with readers array
|
||||
const meterProvider = new MeterProvider({
|
||||
resource: resource,
|
||||
readers: [metricReader]
|
||||
});
|
||||
|
||||
// Register the meter provider globally using proper API
|
||||
metrics.setGlobalMeterProvider(meterProvider);
|
||||
|
||||
console.log('OpenTelemetry initialized for frontend');
|
||||
} catch (error) {
|
||||
console.error('Failed to initialize OpenTelemetry:', error);
|
||||
// Continue without OpenTelemetry if initialization fails
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize OpenTelemetry before rendering the app
|
||||
initOpenTelemetry();
|
||||
|
||||
// Initialize analytics tracking and store cleanup function
|
||||
analyticsCleanup = initializeAnalytics();
|
||||
|
||||
// Cleanup on page unload
|
||||
window.addEventListener('beforeunload', () => {
|
||||
if (analyticsCleanup) {
|
||||
analyticsCleanup();
|
||||
}
|
||||
});
|
||||
|
||||
// PWA/ServiceWorker functionality removed to avoid conflicts in development
|
||||
|
||||
ReactDOM.createRoot(document.getElementById('root')!).render(
|
||||
|
||||
301
frontend/src/utils/analytics.ts
Normal file
301
frontend/src/utils/analytics.ts
Normal file
@@ -0,0 +1,301 @@
|
||||
import { trace } from '@opentelemetry/api';
|
||||
import { ATTR_HTTP_ROUTE } from '@opentelemetry/semantic-conventions';
|
||||
|
||||
// Types and Interfaces
|
||||
interface AnalyticsMetadata {
|
||||
[key: string]: string | number | boolean | undefined;
|
||||
}
|
||||
|
||||
// Constants
|
||||
const ANALYTICS_ENABLED_KEY = 'analyticsEnabled';
|
||||
const LOCATION_CONSENT_KEY = 'locationTrackingConsent';
|
||||
const SESSION_ID_KEY = 'sessionId';
|
||||
const USER_ID_KEY = 'userId';
|
||||
|
||||
// Generate a unique session ID
|
||||
const generateSessionId = (): string => {
|
||||
return Date.now().toString(36) + Math.random().toString(36).substring(2);
|
||||
};
|
||||
|
||||
// Get current user ID (implement based on your auth system)
|
||||
export const getCurrentUserId = (): string | null => {
|
||||
// This is a placeholder - implement based on your authentication system
|
||||
// For example, you might get this from localStorage, cookies, or context
|
||||
return localStorage.getItem(USER_ID_KEY) || sessionStorage.getItem(USER_ID_KEY) || null;
|
||||
};
|
||||
|
||||
// Track page view
|
||||
export const trackPageView = (pathname: string): void => {
|
||||
// Check if analytics are enabled
|
||||
if (!isAnalyticsEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const tracer = trace.getTracer('bakery-frontend');
|
||||
const user_id = getCurrentUserId();
|
||||
|
||||
const span = tracer.startSpan('page_view', {
|
||||
attributes: {
|
||||
[ATTR_HTTP_ROUTE]: pathname,
|
||||
'user.id': user_id || 'anonymous',
|
||||
'page.path': pathname,
|
||||
}
|
||||
});
|
||||
|
||||
// End the span immediately for page views
|
||||
span.end();
|
||||
} catch (error) {
|
||||
console.error('Failed to track page view:', error);
|
||||
}
|
||||
};
|
||||
|
||||
// Check if analytics are enabled
|
||||
export const isAnalyticsEnabled = (): boolean => {
|
||||
return localStorage.getItem(ANALYTICS_ENABLED_KEY) !== 'false';
|
||||
};
|
||||
|
||||
// Enable or disable analytics
|
||||
export const setAnalyticsEnabled = (enabled: boolean): void => {
|
||||
localStorage.setItem(ANALYTICS_ENABLED_KEY, enabled.toString());
|
||||
};
|
||||
|
||||
// Check if location tracking consent is granted
|
||||
export const isLocationTrackingConsentGranted = (): boolean => {
|
||||
return localStorage.getItem(LOCATION_CONSENT_KEY) === 'granted';
|
||||
};
|
||||
|
||||
// Set location tracking consent
|
||||
export const setLocationTrackingConsent = (granted: boolean): void => {
|
||||
localStorage.setItem(LOCATION_CONSENT_KEY, granted ? 'granted' : 'denied');
|
||||
};
|
||||
|
||||
// Track user session
|
||||
export const trackSession = (): (() => void) => {
|
||||
// Check if analytics are enabled
|
||||
if (!isAnalyticsEnabled()) {
|
||||
console.log('Analytics disabled by user preference');
|
||||
return () => {}; // Return no-op cleanup function
|
||||
}
|
||||
|
||||
try {
|
||||
const tracer = trace.getTracer('bakery-frontend');
|
||||
const sessionId = generateSessionId();
|
||||
const userId = getCurrentUserId();
|
||||
|
||||
const span = tracer.startSpan('user_session', {
|
||||
attributes: {
|
||||
'session.id': sessionId,
|
||||
'user.id': userId || 'anonymous',
|
||||
'browser.user_agent': navigator.userAgent,
|
||||
'screen.width': window.screen.width.toString(),
|
||||
'screen.height': window.screen.height.toString(),
|
||||
'device.type': /mobile|tablet|ipad|iphone|ipod|android|silk/i.test(navigator.userAgent) ? 'mobile' : 'desktop'
|
||||
}
|
||||
});
|
||||
|
||||
// Store session ID in sessionStorage for later use
|
||||
sessionStorage.setItem(SESSION_ID_KEY, sessionId);
|
||||
|
||||
// End span when session ends
|
||||
const handleBeforeUnload = () => {
|
||||
span.end();
|
||||
};
|
||||
|
||||
window.addEventListener('beforeunload', handleBeforeUnload);
|
||||
|
||||
// Clean up event listener when needed
|
||||
return () => {
|
||||
window.removeEventListener('beforeunload', handleBeforeUnload);
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Failed to track session:', error);
|
||||
return () => {}; // Return no-op cleanup function
|
||||
}
|
||||
};
|
||||
|
||||
// Track user action
|
||||
export const trackUserAction = (action: string, metadata?: AnalyticsMetadata): void => {
|
||||
// Check if analytics are enabled
|
||||
if (!isAnalyticsEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const tracer = trace.getTracer('bakery-frontend');
|
||||
const userId = getCurrentUserId();
|
||||
|
||||
const span = tracer.startSpan('user_action', {
|
||||
attributes: {
|
||||
'user.action': action,
|
||||
'user.id': userId || 'anonymous',
|
||||
...metadata
|
||||
}
|
||||
});
|
||||
|
||||
span.end();
|
||||
} catch (error) {
|
||||
console.error('Failed to track user action:', error);
|
||||
}
|
||||
};
|
||||
|
||||
// Track user location (with consent)
|
||||
export const trackUserLocation = async (): Promise<void> => {
|
||||
// Check if analytics are enabled
|
||||
if (!isAnalyticsEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if location tracking consent is granted
|
||||
if (!isLocationTrackingConsentGranted()) {
|
||||
console.log('Location tracking consent not granted');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const position = await new Promise<GeolocationPosition>((resolve, reject) => {
|
||||
if (!navigator.geolocation) {
|
||||
reject(new Error('Geolocation not supported'));
|
||||
return;
|
||||
}
|
||||
|
||||
navigator.geolocation.getCurrentPosition(resolve, reject, {
|
||||
enableHighAccuracy: false,
|
||||
timeout: 10000,
|
||||
maximumAge: 300000 // 5 minutes
|
||||
});
|
||||
});
|
||||
|
||||
const tracer = trace.getTracer('bakery-frontend');
|
||||
const userId = getCurrentUserId();
|
||||
|
||||
const span = tracer.startSpan('user_location', {
|
||||
attributes: {
|
||||
'user.id': userId || 'anonymous',
|
||||
'location.latitude': position.coords.latitude,
|
||||
'location.longitude': position.coords.longitude,
|
||||
'location.accuracy': position.coords.accuracy,
|
||||
'location.altitude': position.coords.altitude ?? undefined,
|
||||
'location.speed': position.coords.speed ?? undefined,
|
||||
'location.heading': position.coords.heading ?? undefined
|
||||
}
|
||||
});
|
||||
|
||||
span.end();
|
||||
} catch (error) {
|
||||
console.log('Location access denied or unavailable:', error);
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize analytics tracking
|
||||
export const initializeAnalytics = (): (() => void) => {
|
||||
// Track initial session
|
||||
const cleanupSession = trackSession();
|
||||
|
||||
// Track initial page view
|
||||
trackPageView(window.location.pathname);
|
||||
|
||||
// Listen for route changes (for SPA navigation)
|
||||
let previousUrl = window.location.href;
|
||||
|
||||
// For hash-based routing
|
||||
const handleHashChange = () => {
|
||||
if (window.location.href !== previousUrl) {
|
||||
trackPageView(window.location.pathname + window.location.search);
|
||||
previousUrl = window.location.href;
|
||||
}
|
||||
};
|
||||
|
||||
// For history API-based routing (most common in React apps)
|
||||
// Use proper typing for history state methods
|
||||
const originalPushState = history.pushState.bind(history);
|
||||
const handlePushState = function (
|
||||
this: History,
|
||||
data: unknown,
|
||||
unused: string,
|
||||
url?: string | URL | null
|
||||
) {
|
||||
originalPushState(data, unused, url);
|
||||
setTimeout(() => {
|
||||
if (window.location.href !== previousUrl) {
|
||||
trackPageView(window.location.pathname + window.location.search);
|
||||
previousUrl = window.location.href;
|
||||
}
|
||||
}, 0);
|
||||
};
|
||||
|
||||
const originalReplaceState = history.replaceState.bind(history);
|
||||
const handleReplaceState = function (
|
||||
this: History,
|
||||
data: unknown,
|
||||
unused: string,
|
||||
url?: string | URL | null
|
||||
) {
|
||||
originalReplaceState(data, unused, url);
|
||||
setTimeout(() => {
|
||||
if (window.location.href !== previousUrl) {
|
||||
trackPageView(window.location.pathname + window.location.search);
|
||||
previousUrl = window.location.href;
|
||||
}
|
||||
}, 0);
|
||||
};
|
||||
|
||||
// Override history methods
|
||||
history.pushState = handlePushState;
|
||||
history.replaceState = handleReplaceState;
|
||||
|
||||
// Add event listeners
|
||||
window.addEventListener('hashchange', handleHashChange);
|
||||
|
||||
// Track user consent for location if needed
|
||||
if (isLocationTrackingConsentGranted()) {
|
||||
trackUserLocation();
|
||||
}
|
||||
|
||||
// Return cleanup function
|
||||
return () => {
|
||||
// Restore original history methods
|
||||
history.pushState = originalPushState;
|
||||
history.replaceState = originalReplaceState;
|
||||
|
||||
// Remove event listeners
|
||||
window.removeEventListener('hashchange', handleHashChange);
|
||||
|
||||
// Clean up session tracking
|
||||
cleanupSession();
|
||||
};
|
||||
};
|
||||
|
||||
// Function to track custom metrics using OpenTelemetry spans
|
||||
export const trackCustomMetric = (
|
||||
name: string,
|
||||
value: number,
|
||||
attributes?: Record<string, string>
|
||||
): void => {
|
||||
// Check if analytics are enabled
|
||||
if (!isAnalyticsEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Record metric as a span with the value as an attribute
|
||||
// This approach works well for browser-based metrics since
|
||||
// the OpenTelemetry metrics API in browsers sends to the same collector
|
||||
const tracer = trace.getTracer('bakery-frontend');
|
||||
const userId = getCurrentUserId();
|
||||
|
||||
const span = tracer.startSpan('custom_metric', {
|
||||
attributes: {
|
||||
'metric.name': name,
|
||||
'metric.value': value,
|
||||
'user.id': userId || 'anonymous',
|
||||
...attributes
|
||||
}
|
||||
});
|
||||
|
||||
span.end();
|
||||
} catch (error) {
|
||||
// Log error but don't fail - metrics are non-critical
|
||||
console.warn('Failed to track custom metric:', error);
|
||||
}
|
||||
};
|
||||
@@ -51,10 +51,11 @@ export default defineConfig(({ mode }) => {
|
||||
build: {
|
||||
outDir: 'dist',
|
||||
// For production builds: ensure assets have correct paths
|
||||
// Base path should be '/' for root deployment
|
||||
// Base path should match the deployment URL
|
||||
base: process.env.VITE_BASE_URL || '/',
|
||||
// In development mode: inline source maps for better debugging
|
||||
// In production mode: external source maps
|
||||
sourcemap: isDevelopment ? 'inline' : true,
|
||||
// In production mode: external source maps (can be disabled with VITE_DISABLE_SOURCEMAPS)
|
||||
sourcemap: process.env.VITE_DISABLE_SOURCEMAPS ? false : (isDevelopment ? 'inline' : true),
|
||||
// In development mode: disable minification for readable errors
|
||||
// In production mode: use esbuild minification
|
||||
minify: isDevelopment ? false : 'esbuild',
|
||||
|
||||
@@ -25,7 +25,7 @@ from app.middleware.rate_limiting import APIRateLimitMiddleware
|
||||
from app.middleware.subscription import SubscriptionMiddleware
|
||||
from app.middleware.demo_middleware import DemoMiddleware
|
||||
from app.middleware.read_only_mode import ReadOnlyModeMiddleware
|
||||
from app.routes import auth, tenant, registration, nominatim, subscription, demo, pos, geocoding, poi_context, webhooks
|
||||
from app.routes import auth, tenant, registration, nominatim, subscription, demo, pos, geocoding, poi_context, webhooks, telemetry
|
||||
|
||||
# Initialize logger
|
||||
logger = structlog.get_logger()
|
||||
@@ -169,6 +169,9 @@ app.include_router(demo.router, prefix="/api/v1", tags=["demo"])
|
||||
# Webhook routes are defined with full /api/v1/webhooks/* paths for consistency
|
||||
app.include_router(webhooks.router, prefix="", tags=["webhooks"])
|
||||
|
||||
# Include telemetry routes for frontend OpenTelemetry data
|
||||
app.include_router(telemetry.router, prefix="/api/v1", tags=["telemetry"])
|
||||
|
||||
|
||||
# ================================================================
|
||||
# SERVER-SENT EVENTS (SSE) HELPER FUNCTIONS
|
||||
|
||||
@@ -47,7 +47,10 @@ PUBLIC_ROUTES = [
|
||||
"/api/v1/demo/accounts",
|
||||
"/api/v1/demo/sessions",
|
||||
"/api/v1/webhooks/stripe", # Stripe webhook endpoint - bypasses auth for signature verification
|
||||
"/api/v1/webhooks/generic" # Generic webhook endpoint
|
||||
"/api/v1/webhooks/generic", # Generic webhook endpoint
|
||||
"/api/v1/telemetry/v1/traces", # Frontend telemetry traces - no auth for performance
|
||||
"/api/v1/telemetry/v1/metrics", # Frontend telemetry metrics - no auth for performance
|
||||
"/api/v1/telemetry/health" # Telemetry health check
|
||||
]
|
||||
|
||||
# Routes accessible with demo session (no JWT required, just demo session header)
|
||||
|
||||
303
gateway/app/routes/telemetry.py
Normal file
303
gateway/app/routes/telemetry.py
Normal file
@@ -0,0 +1,303 @@
|
||||
"""
|
||||
Telemetry routes for API Gateway - Handles frontend telemetry data
|
||||
|
||||
This module provides endpoints for:
|
||||
- Receiving OpenTelemetry traces from frontend
|
||||
- Proxying traces to Signoz OTel collector
|
||||
- Providing a secure, authenticated endpoint for frontend telemetry
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Request, HTTPException, status
|
||||
from fastapi.responses import JSONResponse, Response
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.header_manager import header_manager
|
||||
from shared.monitoring.metrics import MetricsCollector, create_metrics_collector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/telemetry", tags=["telemetry"])
|
||||
|
||||
# Get Signoz OTel collector endpoint from environment or use default
|
||||
SIGNOZ_OTEL_COLLECTOR = os.getenv(
|
||||
"SIGNOZ_OTEL_COLLECTOR_URL",
|
||||
"http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
)
|
||||
|
||||
@router.post("/v1/traces")
|
||||
async def receive_frontend_traces(request: Request):
|
||||
"""
|
||||
Receive OpenTelemetry traces from frontend and proxy to Signoz
|
||||
|
||||
This endpoint:
|
||||
- Accepts OTLP trace data from frontend
|
||||
- Validates the request
|
||||
- Proxies to Signoz OTel collector
|
||||
- Handles errors gracefully
|
||||
"""
|
||||
|
||||
# Handle OPTIONS requests for CORS
|
||||
if request.method == "OPTIONS":
|
||||
return Response(
|
||||
status_code=200,
|
||||
headers={
|
||||
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
|
||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
|
||||
"Access-Control-Allow-Credentials": "true",
|
||||
"Access-Control-Max-Age": "86400"
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
# Get the trace data from the request
|
||||
body = await request.body()
|
||||
|
||||
if not body:
|
||||
logger.warning("Received empty trace data from frontend")
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": "Empty trace data"}
|
||||
)
|
||||
|
||||
# Log the trace reception (without sensitive data)
|
||||
logger.info(
|
||||
"Received frontend traces, content_length=%s, content_type=%s, user_agent=%s",
|
||||
len(body),
|
||||
request.headers.get("content-type"),
|
||||
request.headers.get("user-agent")
|
||||
)
|
||||
|
||||
# Forward to Signoz OTel collector
|
||||
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/traces"
|
||||
|
||||
# Set up headers for the Signoz collector
|
||||
forward_headers = {
|
||||
"Content-Type": request.headers.get("content-type", "application/json"),
|
||||
"User-Agent": "bakery-gateway/1.0",
|
||||
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
|
||||
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
|
||||
}
|
||||
|
||||
# Add authentication if configured
|
||||
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
|
||||
if signoz_auth_token:
|
||||
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
|
||||
|
||||
# Send to Signoz collector
|
||||
timeout_config = httpx.Timeout(
|
||||
connect=5.0,
|
||||
read=10.0,
|
||||
write=5.0,
|
||||
pool=5.0
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as client:
|
||||
response = await client.post(
|
||||
url=target_url,
|
||||
content=body,
|
||||
headers=forward_headers
|
||||
)
|
||||
|
||||
# Log the response from Signoz
|
||||
logger.info(
|
||||
"Forwarded traces to Signoz, signoz_status=%s, signoz_response_time=%s",
|
||||
response.status_code,
|
||||
response.elapsed.total_seconds()
|
||||
)
|
||||
|
||||
# Return success response to frontend
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content={
|
||||
"message": "Traces received and forwarded to Signoz",
|
||||
"signoz_status": response.status_code,
|
||||
"trace_count": 1 # We don't know exact count without parsing
|
||||
}
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(
|
||||
"Signoz collector returned error, status_code=%s, error_message=%s",
|
||||
e.response.status_code,
|
||||
str(e)
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=502,
|
||||
content={
|
||||
"error": "Signoz collector error",
|
||||
"details": str(e),
|
||||
"signoz_status": e.response.status_code
|
||||
}
|
||||
)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error(
|
||||
"Failed to connect to Signoz collector, error=%s, collector_url=%s",
|
||||
str(e),
|
||||
SIGNOZ_OTEL_COLLECTOR
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={
|
||||
"error": "Signoz collector unavailable",
|
||||
"details": str(e)
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unexpected error processing traces, error=%s, error_type=%s",
|
||||
str(e),
|
||||
type(e).__name__
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={
|
||||
"error": "Internal server error",
|
||||
"details": str(e)
|
||||
}
|
||||
)
|
||||
|
||||
@router.post("/v1/metrics")
|
||||
async def receive_frontend_metrics(request: Request):
|
||||
"""
|
||||
Receive OpenTelemetry metrics from frontend and proxy to Signoz
|
||||
"""
|
||||
|
||||
# Handle OPTIONS requests for CORS
|
||||
if request.method == "OPTIONS":
|
||||
return Response(
|
||||
status_code=200,
|
||||
headers={
|
||||
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
|
||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
|
||||
"Access-Control-Allow-Credentials": "true",
|
||||
"Access-Control-Max-Age": "86400"
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
body = await request.body()
|
||||
|
||||
if not body:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": "Empty metrics data"}
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Received frontend metrics, content_length=%s, content_type=%s",
|
||||
len(body),
|
||||
request.headers.get("content-type")
|
||||
)
|
||||
|
||||
# Forward to Signoz OTel collector
|
||||
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/metrics"
|
||||
|
||||
forward_headers = {
|
||||
"Content-Type": request.headers.get("content-type", "application/json"),
|
||||
"User-Agent": "bakery-gateway/1.0",
|
||||
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
|
||||
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
|
||||
}
|
||||
|
||||
# Add authentication if configured
|
||||
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
|
||||
if signoz_auth_token:
|
||||
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
|
||||
|
||||
timeout_config = httpx.Timeout(
|
||||
connect=5.0,
|
||||
read=10.0,
|
||||
write=5.0,
|
||||
pool=5.0
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as client:
|
||||
response = await client.post(
|
||||
url=target_url,
|
||||
content=body,
|
||||
headers=forward_headers
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Forwarded metrics to Signoz, signoz_status=%s",
|
||||
response.status_code
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content={
|
||||
"message": "Metrics received and forwarded to Signoz",
|
||||
"signoz_status": response.status_code
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error processing metrics, error=%s",
|
||||
str(e)
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={
|
||||
"error": "Internal server error",
|
||||
"details": str(e)
|
||||
}
|
||||
)
|
||||
|
||||
@router.get("/health")
|
||||
async def telemetry_health():
|
||||
"""
|
||||
Health check endpoint for telemetry service
|
||||
"""
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content={
|
||||
"status": "healthy",
|
||||
"service": "telemetry-gateway",
|
||||
"signoz_collector": SIGNOZ_OTEL_COLLECTOR
|
||||
}
|
||||
)
|
||||
|
||||
# Initialize metrics for this module
|
||||
try:
|
||||
metrics_collector = create_metrics_collector("gateway-telemetry")
|
||||
except Exception as e:
|
||||
logger.error("Failed to create metrics collector, error=%s", str(e))
|
||||
metrics_collector = None
|
||||
|
||||
@router.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Initialize telemetry metrics on startup"""
|
||||
try:
|
||||
if metrics_collector:
|
||||
# Register telemetry-specific metrics
|
||||
metrics_collector.register_counter(
|
||||
"gateway_telemetry_traces_received",
|
||||
"Number of trace batches received from frontend"
|
||||
)
|
||||
metrics_collector.register_counter(
|
||||
"gateway_telemetry_metrics_received",
|
||||
"Number of metric batches received from frontend"
|
||||
)
|
||||
metrics_collector.register_counter(
|
||||
"gateway_telemetry_errors",
|
||||
"Number of telemetry processing errors"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Telemetry gateway initialized, signoz_collector=%s",
|
||||
SIGNOZ_OTEL_COLLECTOR
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to initialize telemetry metrics, error=%s",
|
||||
str(e)
|
||||
)
|
||||
@@ -6,7 +6,7 @@
|
||||
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-prod.yaml
|
||||
|
||||
global:
|
||||
storageClass: "standard" # For MicroK8s, use "microk8s-hostpath" or custom storage class
|
||||
storageClass: "microk8s-hostpath" # For MicroK8s, use "microk8s-hostpath" or custom storage class
|
||||
clusterName: "bakery-ia-prod"
|
||||
domain: "monitoring.bakewise.ai"
|
||||
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
||||
|
||||
@@ -140,10 +140,9 @@ spec:
|
||||
name: pos-integration-secrets
|
||||
- secretRef:
|
||||
name: whatsapp-secrets
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /app/models
|
||||
readOnly: true # Forecasting only reads models
|
||||
- secretRef:
|
||||
name: minio-secrets
|
||||
# Model storage now uses MinIO - no local volumeMounts needed
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
@@ -172,10 +171,7 @@ spec:
|
||||
secret:
|
||||
secretName: redis-tls-secret
|
||||
defaultMode: 0400
|
||||
- name: model-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: model-storage
|
||||
readOnly: true # Forecasting only reads models
|
||||
# Model storage migrated to MinIO - PVC no longer needed
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
||||
@@ -56,6 +56,11 @@ spec:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: SIGNOZ_OTEL_COLLECTOR_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: SIGNOZ_OTEL_COLLECTOR_URL
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: minio
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: minio
|
||||
app.kubernetes.io/component: storage
|
||||
app.kubernetes.io/part-of: bakery-ia
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: minio
|
||||
app.kubernetes.io/component: storage
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: minio
|
||||
app.kubernetes.io/component: storage
|
||||
spec:
|
||||
# Init container to set up TLS certificates with correct permissions
|
||||
initContainers:
|
||||
- name: init-certs
|
||||
image: busybox:1.36
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
mkdir -p /certs/CAs
|
||||
cp /certs-secret/minio-cert.pem /certs/public.crt
|
||||
cp /certs-secret/minio-key.pem /certs/private.key
|
||||
cp /certs-secret/ca-cert.pem /certs/CAs/ca.crt
|
||||
chmod 600 /certs/private.key
|
||||
chmod 644 /certs/public.crt /certs/CAs/ca.crt
|
||||
volumeMounts:
|
||||
- name: certs-secret
|
||||
mountPath: /certs-secret
|
||||
readOnly: true
|
||||
- name: certs
|
||||
mountPath: /certs
|
||||
containers:
|
||||
- name: minio
|
||||
image: minio/minio:RELEASE.2024-11-07T00-52-20Z
|
||||
args:
|
||||
- server
|
||||
- /data
|
||||
- --console-address
|
||||
- :9001
|
||||
- --address
|
||||
- :9000
|
||||
- --certs-dir
|
||||
- /certs
|
||||
env:
|
||||
- name: MINIO_ROOT_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: MINIO_ROOT_USER
|
||||
- name: MINIO_ROOT_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: MINIO_ROOT_PASSWORD
|
||||
# Enable TLS for MinIO
|
||||
- name: MINIO_SERVER_URL
|
||||
value: "https://minio.bakery-ia.svc.cluster.local:9000"
|
||||
- name: MINIO_BROWSER_REDIRECT_URL
|
||||
value: "https://minio-console.bakery-ia.svc.cluster.local:9001"
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
name: api
|
||||
- containerPort: 9001
|
||||
name: console
|
||||
volumeMounts:
|
||||
- name: minio-data
|
||||
mountPath: /data
|
||||
- name: certs
|
||||
mountPath: /certs
|
||||
readOnly: true
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "200m"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "1000m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/live
|
||||
port: 9000
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/ready
|
||||
port: 9000
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 15
|
||||
volumes:
|
||||
- name: minio-data
|
||||
persistentVolumeClaim:
|
||||
claimName: minio-data
|
||||
- name: certs-secret
|
||||
secret:
|
||||
secretName: minio-tls
|
||||
- name: certs
|
||||
emptyDir: {}
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: minio
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: minio
|
||||
app.kubernetes.io/component: storage
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
protocol: TCP
|
||||
name: api
|
||||
- port: 9001
|
||||
targetPort: 9001
|
||||
protocol: TCP
|
||||
name: console
|
||||
selector:
|
||||
app.kubernetes.io/name: minio
|
||||
app.kubernetes.io/component: storage
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: minio-console
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: minio
|
||||
app.kubernetes.io/component: storage
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9001
|
||||
targetPort: 9001
|
||||
protocol: TCP
|
||||
name: console
|
||||
selector:
|
||||
app.kubernetes.io/name: minio
|
||||
app.kubernetes.io/component: storage
|
||||
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: minio-data
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: minio-data
|
||||
app.kubernetes.io/component: storage
|
||||
app.kubernetes.io/part-of: bakery-ia
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
storageClassName: standard
|
||||
@@ -0,0 +1,22 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: minio-secrets
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: minio-secrets
|
||||
app.kubernetes.io/component: storage
|
||||
app.kubernetes.io/part-of: bakery-ia
|
||||
type: Opaque
|
||||
data:
|
||||
# MinIO Root Credentials (base64 encoded)
|
||||
MINIO_ROOT_USER: YWRtaW4= # admin
|
||||
MINIO_ROOT_PASSWORD: c2VjdXJlLXBhc3N3b3Jk # secure-password
|
||||
|
||||
# Service Account Credentials for applications
|
||||
MINIO_ACCESS_KEY: dHJhaW5pbmctc2VydmljZQ== # training-service
|
||||
MINIO_SECRET_KEY: dHJhaW5pbmctc2VjcmV0LWtleQ== # training-secret-key
|
||||
|
||||
# Forecasting Service Credentials
|
||||
FORECASTING_MINIO_ACCESS_KEY: Zm9yZWNhc3Rpbmctc2VydmljZQ== # forecasting-service
|
||||
FORECASTING_MINIO_SECRET_KEY: Zm9yZWNhc3Rpbmctc2VjcmV0LWtleQ== # forecasting-secret-key
|
||||
@@ -140,11 +140,11 @@ spec:
|
||||
name: pos-integration-secrets
|
||||
- secretRef:
|
||||
name: whatsapp-secrets
|
||||
- secretRef:
|
||||
name: minio-secrets
|
||||
volumeMounts:
|
||||
- name: tmp-storage
|
||||
mountPath: /tmp
|
||||
- name: model-storage
|
||||
mountPath: /app/models
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
@@ -176,9 +176,6 @@ spec:
|
||||
- name: tmp-storage
|
||||
emptyDir:
|
||||
sizeLimit: 4Gi # Increased from 2Gi to handle cmdstan temp files during optimization
|
||||
- name: model-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: model-storage
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: model-storage
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: model-storage
|
||||
app.kubernetes.io/component: storage
|
||||
app.kubernetes.io/part-of: bakery-ia
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce # Single node access (works with local Kubernetes)
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi # Adjust based on your needs
|
||||
storageClassName: standard # Use default local-path provisioner
|
||||
@@ -66,6 +66,17 @@ data:
|
||||
ALERT_PROCESSOR_DB_HOST: "alert-processor-db-service"
|
||||
AI_INSIGHTS_DB_HOST: "ai-insights-db-service"
|
||||
DISTRIBUTION_DB_HOST: "distribution-db-service"
|
||||
DEMO_SESSION_DB_HOST: "demo-session-db-service"
|
||||
|
||||
# MinIO Configuration
|
||||
MINIO_ENDPOINT: "minio.bakery-ia.svc.cluster.local:9000"
|
||||
MINIO_USE_SSL: "true"
|
||||
MINIO_MODEL_BUCKET: "training-models"
|
||||
MINIO_CONSOLE_PORT: "9001"
|
||||
MINIO_API_PORT: "9000"
|
||||
MINIO_REGION: "us-east-1"
|
||||
MINIO_MODEL_LIFECYCLE_DAYS: "90"
|
||||
MINIO_CACHE_TTL_SECONDS: "3600"
|
||||
|
||||
# Database Configuration
|
||||
DB_PORT: "5432"
|
||||
@@ -238,7 +249,8 @@ data:
|
||||
# ================================================================
|
||||
# MODEL STORAGE & TRAINING
|
||||
# ================================================================
|
||||
MODEL_STORAGE_PATH: "/app/models"
|
||||
# Model storage is handled by MinIO (see MinIO Configuration section)
|
||||
MODEL_STORAGE_BACKEND: "minio"
|
||||
MODEL_BACKUP_ENABLED: "true"
|
||||
MODEL_VERSIONING_ENABLED: "true"
|
||||
MAX_TRAINING_TIME_MINUTES: "30"
|
||||
@@ -416,6 +428,9 @@ data:
|
||||
# OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: "signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||
# OTEL_EXPORTER_OTLP_LOGS_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
|
||||
# Gateway telemetry proxy configuration
|
||||
SIGNOZ_OTEL_COLLECTOR_URL: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
|
||||
# Optional: Protocol overrides per signal
|
||||
# OTEL_EXPORTER_OTLP_TRACES_PROTOCOL: "grpc"
|
||||
# OTEL_EXPORTER_OTLP_METRICS_PROTOCOL: "grpc"
|
||||
|
||||
193
infrastructure/kubernetes/base/jobs/minio-bucket-init-job.yaml
Normal file
193
infrastructure/kubernetes/base/jobs/minio-bucket-init-job.yaml
Normal file
@@ -0,0 +1,193 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: minio-bucket-init
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: minio-bucket-init
|
||||
app.kubernetes.io/component: storage-init
|
||||
app.kubernetes.io/part-of: bakery-ia
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 300
|
||||
backoffLimit: 3
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: minio-bucket-init
|
||||
app.kubernetes.io/component: storage-init
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
initContainers:
|
||||
# Wait for MinIO to be ready
|
||||
- name: wait-for-minio
|
||||
image: busybox:1.36
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
echo "Waiting for MinIO to be ready..."
|
||||
until nc -z minio.bakery-ia.svc.cluster.local 9000; do
|
||||
echo "MinIO not ready, waiting..."
|
||||
sleep 5
|
||||
done
|
||||
echo "MinIO is ready!"
|
||||
containers:
|
||||
- name: bucket-init
|
||||
image: minio/mc:RELEASE.2024-11-17T19-35-25Z
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
|
||||
echo "Configuring MinIO client..."
|
||||
|
||||
# Configure mc alias with TLS (skip cert verification for self-signed)
|
||||
mc alias set myminio https://minio.bakery-ia.svc.cluster.local:9000 \
|
||||
${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD} --insecure
|
||||
|
||||
echo "Creating buckets..."
|
||||
|
||||
# Create training-models bucket if not exists
|
||||
if ! mc ls myminio/training-models --insecure 2>/dev/null; then
|
||||
mc mb myminio/training-models --insecure
|
||||
echo "Created bucket: training-models"
|
||||
else
|
||||
echo "Bucket already exists: training-models"
|
||||
fi
|
||||
|
||||
# Set bucket policy (private by default)
|
||||
mc anonymous set none myminio/training-models --insecure
|
||||
|
||||
# Enable versioning for model backups
|
||||
mc version enable myminio/training-models --insecure
|
||||
echo "Enabled versioning on training-models bucket"
|
||||
|
||||
# Set lifecycle policy to expire old versions after 90 days
|
||||
cat > /tmp/lifecycle.json << 'EOF'
|
||||
{
|
||||
"Rules": [
|
||||
{
|
||||
"ID": "expire-old-versions",
|
||||
"Status": "Enabled",
|
||||
"Filter": {
|
||||
"Prefix": "models/"
|
||||
},
|
||||
"NoncurrentVersionExpiration": {
|
||||
"NoncurrentDays": 90
|
||||
}
|
||||
},
|
||||
{
|
||||
"ID": "expire-old-metadata",
|
||||
"Status": "Enabled",
|
||||
"Filter": {
|
||||
"Prefix": "models/"
|
||||
},
|
||||
"Expiration": {
|
||||
"ExpiredObjectDeleteMarker": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
mc ilm import myminio/training-models < /tmp/lifecycle.json --insecure || true
|
||||
echo "Lifecycle policy configured"
|
||||
|
||||
# Create service accounts with limited permissions
|
||||
echo "Creating service accounts..."
|
||||
|
||||
# Training service policy (read/write models)
|
||||
cat > /tmp/training-policy.json << 'EOF'
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject",
|
||||
"s3:DeleteObject",
|
||||
"s3:ListBucket",
|
||||
"s3:GetBucketLocation",
|
||||
"s3:ListBucketMultipartUploads"
|
||||
],
|
||||
"Resource": [
|
||||
"arn:aws:s3:::training-models",
|
||||
"arn:aws:s3:::training-models/*"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
# Forecasting service policy (read-only models)
|
||||
cat > /tmp/forecasting-policy.json << 'EOF'
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:ListBucket"
|
||||
],
|
||||
"Resource": [
|
||||
"arn:aws:s3:::training-models",
|
||||
"arn:aws:s3:::training-models/*"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
# Create service accounts using credentials from secrets
|
||||
echo "Creating service accounts..."
|
||||
mc admin user add myminio ${TRAINING_MINIO_USER} ${TRAINING_MINIO_PASSWORD} --insecure 2>/dev/null || true
|
||||
mc admin user add myminio ${FORECASTING_MINIO_USER} ${FORECASTING_MINIO_PASSWORD} --insecure 2>/dev/null || true
|
||||
|
||||
# Apply policies (ignore errors if already exists)
|
||||
mc admin policy create myminio training-policy /tmp/training-policy.json --insecure 2>/dev/null || true
|
||||
mc admin policy attach myminio training-policy --user=${TRAINING_MINIO_USER} --insecure 2>/dev/null || true
|
||||
|
||||
mc admin policy create myminio forecasting-policy /tmp/forecasting-policy.json --insecure 2>/dev/null || true
|
||||
mc admin policy attach myminio forecasting-policy --user=${FORECASTING_MINIO_USER} --insecure 2>/dev/null || true
|
||||
|
||||
echo "MinIO bucket initialization complete!"
|
||||
|
||||
# List buckets for verification
|
||||
echo "Current buckets:"
|
||||
mc ls myminio --insecure
|
||||
|
||||
env:
|
||||
- name: MINIO_ROOT_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: MINIO_ROOT_USER
|
||||
- name: MINIO_ROOT_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: MINIO_ROOT_PASSWORD
|
||||
# Training service MinIO credentials
|
||||
- name: TRAINING_MINIO_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: MINIO_ACCESS_KEY
|
||||
- name: TRAINING_MINIO_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: MINIO_SECRET_KEY
|
||||
# Forecasting service MinIO credentials
|
||||
- name: FORECASTING_MINIO_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: FORECASTING_MINIO_ACCESS_KEY
|
||||
- name: FORECASTING_MINIO_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secrets
|
||||
key: FORECASTING_MINIO_SECRET_KEY
|
||||
@@ -18,6 +18,13 @@ resources:
|
||||
|
||||
# Additional configs
|
||||
- configs/postgres-init-config.yaml
|
||||
|
||||
# MinIO Storage (with TLS)
|
||||
- components/minio/minio-secrets.yaml
|
||||
- secrets/minio-tls-secret.yaml
|
||||
- components/minio/minio-pvc.yaml
|
||||
- components/minio/minio-deployment.yaml
|
||||
- jobs/minio-bucket-init-job.yaml
|
||||
|
||||
# Migration jobs
|
||||
- migrations/auth-migration-job.yaml
|
||||
@@ -63,9 +70,6 @@ resources:
|
||||
- components/nominatim/nominatim.yaml
|
||||
- jobs/nominatim-init-job.yaml
|
||||
|
||||
# Persistent storage
|
||||
- components/volumes/model-storage-pvc.yaml
|
||||
|
||||
# Cert manager cluster issuers
|
||||
- components/cert-manager/cluster-issuer-staging.yaml
|
||||
- components/cert-manager/local-ca-issuer.yaml
|
||||
|
||||
28
infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
Normal file
28
infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
Normal file
@@ -0,0 +1,28 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: minio-tls
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app.kubernetes.io/name: bakery-ia
|
||||
app.kubernetes.io/component: minio-tls
|
||||
app.kubernetes.io/part-of: bakery-ia
|
||||
type: Opaque
|
||||
data:
|
||||
# MinIO TLS certificates (base64 encoded)
|
||||
# Generated using infrastructure/tls/generate-minio-certificates.sh
|
||||
# Valid for 3 years from generation date
|
||||
#
|
||||
# Certificate details:
|
||||
# Subject: CN=minio.bakery-ia.svc.cluster.local, O=BakeryIA, OU=Storage
|
||||
# Issuer: CN=BakeryIA-CA, O=BakeryIA, OU=Security
|
||||
#
|
||||
# To regenerate:
|
||||
# 1. Run: infrastructure/tls/generate-minio-certificates.sh
|
||||
# 2. Run: scripts/create-tls-secrets.sh
|
||||
|
||||
ca-cert.pem: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUZ5ekNDQTdPZ0F3SUJBZ0lVUGdPcU5ZK1pvS0J5UTFNZk84bGtpR2hPbXhJd0RRWUpLb1pJaHZjTkFRRUwKQlFBd2RURUxNQWtHQTFVRUJoTUNWVk14RXpBUkJnTlZCQWdNQ2tOaGJHbG1iM0p1YVdFeEZUQVRCZ05WQkFjTQpERk5oYmtaeVlXNWphWE5qYnpFUk1BOEdBMVVFQ2d3SVFtRnJaWEo1U1VFeEVUQVBCZ05WQkFzTUNGTmxZM1Z5CmFYUjVNUlF3RWdZRFZRUUREQXRDWVd0bGNubEpRUzFEUVRBZUZ3MHlOVEV3TVRneE5ESXlNVFJhRncwek5URXcKTVRZeE5ESXlNVFJhTUhVeEN6QUpCZ05WQkFZVEFsVlRNUk13RVFZRFZRUUlEQXBEWVd4cFptOXlibWxoTVJVdwpFd1lEVlFRSERBeFRZVzVHY21GdVkybHpZMjh4RVRBUEJnTlZCQW9NQ0VKaGEyVnllVWxCTVJFd0R3WURWUVFMCkRBaFRaV04xY21sMGVURVVNQklHQTFVRUF3d0xRbUZyWlhKNVNVRXRRMEV3Z2dJaU1BMEdDU3FHU0liM0RRRUIKQVFVQUE0SUNEd0F3Z2dJS0FvSUNBUURSRDVPMmVna1lnOUhOUlI1U1UwYkxuR0hqcHYvUmFnck03ZGh1c2FXbgpyZkRGNVZwVFo0czkvOXNPRUowTnlqdW9LWGFtb3VUd1IxbncxOUZkSDhmMWVvbWNRNGVLdzJIa3hveHFSMzR0ClJEYUFHejNiV08rcmFUUTRTeU1LN1hGTW92VVVpTGwrR08yM2wxQk5QZmh6a2NEa1o5N200MzRmMVFWbzk5dGIKaFY0YklMYW9GSXFmMDlNMEUxL2ZhQitKQ1I4WWtsN0xvWGd1ejNWUi9CVW5kMHZNc1RNV3VlRC8yblZ1VVpPMAowcFVtVFVCUTJRZDc2NTdrL0hXZC8xd2NFQUw5ZFhOUmJ4aEROZkdnYzNXdFFoZ2djcFlMUWFmTGE4MXRseHljCndEZ042UGRFbFVseGdYL091b1oxeWxNWkU3eHBzTXRwbjFBd2VvZFZibTNRcDVBMXlkeWJFNjF1MXVyWXoxTHQKV05aOWVPZkFxZXdpWVFIVlpXTUM0YTRTYSsyeU02cTVQWC80ZytUYklUaDhoWkp3WFBLNUVEaWc3dkYxNEpQbApsRVJOcHdpYTNuNmEwUDcwM0hQTjZya1FPNWtWVGRpVXNmaWJNdGNVSkhMeVdXUUFSQm15ZVZma0lDYWFlWUVsCkVMa3N3YTlOVkVTS3ZRYUhLU2lIWkZoRUkwYUF2Y3BBam0xRU9oRWEraFNSaE9vRnlVT3ZHK2NNT2ZjQlNtTDAKVW1sRC9sZmFuVFQwems1YXFzcEVrWEdlQnczMXJtWi8wQVpPalYycHBSeFdXZWt6bzlCZjdnNmVMVFk0VUNDNQpNeVB0em14OVRiWHJOQW5YaGlGNkxnNWgyOFI0MkdUZTVBZDZUSGtGOVMvS2hxOHUwZFk1U0EyR1VGMUViUU84Ckt3SURBUUFCbzFNd1VUQWRCZ05WSFE0RUZnUVVBKzZxL2tjOGZUUVUxRURxekdSZktRcHE2bTB3SHdZRFZSMGoKQkJnd0ZvQVVBKzZxL2tjOGZUUVUxRURxekdSZktRcHE2bTB3RHdZRFZSMFRBUUgvQkFVd0F3RUIvekFOQmdrcQpoa2lHOXcwQkFRc0ZBQU9DQWdFQVF1dkZoMitIUUZ5OFZUY1VnYWxFVmlheXQxelFHdjRySVNtaXEzRzZJZVhQClhTNGd3cUhrRnpUd1p2bW9oVHdtT0N3Vy94RjRLZ3htRmJ5V05yRUpKRXFjYmVkcVVXVi8wQkNhRm1KdlVkZEkKK2V4L2lEM0ZlYnU4QUZJK0o4bEJIL0NlbkRpU0xIaGd5c2VZOHV3Um5Yc3NoWDVSbkRpckYxdUtyMUo2MzVhbgpHbHlGSU5Vcm5RbGd1RXZ0cjBlbkdVbHpUNXJXajR5MEFXVWRiWGk4dlJzaldvUThKYTBCeFRyWVloL2tPL0ZJClB0cVg3d3N4b0pNREVRNzF6aHdhN1dMUWMyZGZiMnJBcjF1QmgzcU53aVZCSU5CK3QzSkZ2NzJ4cXNXZ3VySUIKSWYyc29SVEkybk1lNWdURzFEZmQrVjI0amZhL3lJZ0FzTWpDem1HUUsyMHZvYlg0c0FWbm1QVmJaZzlTTEZaaQpNaWRrbjlPOVU2OE1FT2UzSWFzY2xkN2ZwNUprK0hyYkpVNi9zMTZFRVIvQWdEM09vajN3UmdqVENTK0FERCtqCnhvMk84Vlgya1BvMDNBTitpWWEzbkptbE1GekNyelQrOFp4U25QNUZxR2cyRUNFYnFxQTBCLzVuYVZwbWRZYVYKNDFvRkxzd2NGbTJpcUdhd2JzTE45eDN0dklDdUU5M0hZazFqNzJQelhhaVNMdHB2YW1IMWRSWUMrSFVNMUwwTwo0OUNOTVlKZUwvTmx5UXVaSm0yWDBxRE5TWG1STUw4SFU5c093V1g2cFBQSk96dXF0Z2R4Lytsa0dBZDJ3WkpVCklWYm1MNlF2emRidGEvY1NWd3NMdEJ6RzQ4YTFiNEtCYzdXTEhUd2JyZEJSVGcwVGtMWTRrdkNaZTVuTmw0RT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
|
||||
|
||||
minio-cert.pem: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUdyVENDQkpXZ0F3SUJBZ0lVRytCME0ycnhucWpHZHRmbzBCaGV2S0N4MGdBd0RRWUpLb1pJaHZjTkFRRUwKQlFBd2RURUxNQWtHQTFVRUJoTUNWVk14RXpBUkJnTlZCQWdNQ2tOaGJHbG1iM0p1YVdFeEZUQVRCZ05WQkFjTQpERk5oYmtaeVlXNWphWE5qYnpFUk1BOEdBMVVFQ2d3SVFtRnJaWEo1U1VFeEVUQVBCZ05WQkFzTUNGTmxZM1Z5CmFYUjVNUlF3RWdZRFZRUUREQXRDWVd0bGNubEpRUzFEUVRBZUZ3MHlOakF4TVRjeE5EVTBORGhhRncweU9UQXgKTVRZeE5EVTBORGhhTUlHS01Rc3dDUVlEVlFRR0V3SlZVekVUTUJFR0ExVUVDQXdLUTJGc2FXWnZjbTVwWVRFVgpNQk1HQTFVRUJ3d01VMkZ1Um5KaGJtTnBjMk52TVJFd0R3WURWUVFLREFoQ1lXdGxjbmxKUVRFUU1BNEdBMVVFCkN3d0hVM1J2Y21GblpURXFNQ2dHQTFVRUF3d2hiV2x1YVc4dVltRnJaWEo1TFdsaExuTjJZeTVqYkhWemRHVnkKTG14dlkyRnNNSUlDSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQWc4QU1JSUNDZ0tDQWdFQW5qdTd0cFF3dkYvVgprL011UmhySllWME1KcXRyRkovTlgrMU9MSmFNaEZYL0tZMTBMUCtCNjV3L3BsWkd5SnRidFVkV2o1d1pMclpRCm1KYjNwNFR0dUs0QlQxZ3UzYlNaS0lIUU5lQWc4MUtzTUdxKzV1WE9vUFdOckFoaDRoWU9KNDVtSXNZYmEwRGQKTzJNRnY5V3VXVm4zVDZGenpNN3FMZENKelpOamVhQjdtVEpqZEhHcjg0aVQ4NkFFQStIeXd2c3FPb2paZStVagpLdThYcmp4VUdSL2VQRnZRQ3lNZFdnRmJqd2lqSi9CbjhSQ0FSSXVpRXNzalNMUVdPZ1FncklBVHZFRi9jeVVkClpLR2hhYzMvNEk3MXhEV2hYNzFYV1l3T05FbXJRNmNHelhtdmNVTVY4SHZFV016YjA1UnBPWXp5bUtyYnhOTDQKZVdOYUt2cnZjWnpjTXpwSU00UmVHS3cyTjlzQUdzM1lCVFI3V1hMS1dnbkxZYnNvSHgzZGRadXlRK0hKd0RUWApxcFh1dFloYW9DZmZIMjNuTU1GaUFLMWltZWJCSTFoVWNBaVB2cFN4N2RJM21nTlA0YWZOL29xaE1PUGc4VHhtCndNZWt2cHovN2NXYkNPTmprZDlkcTBWTExTVyt0cUlmZlZRajBMT1VQdlhyTE9tUG1jTDZsU2xSTzg4NVRWdngKSkRidDJYVVJtaHFKenBhcklmTmhGOUVscEhtYnNkc2xtWVBvLzlKV1VtcmtiSjZBYWZkbEpuckNUR3hKcGl3TAowbEpveEl3dnFZdDhEQnVjMWNORktKSVNMWkl5bzZ1WFJ1TlZvTnByeGdmVXZsOENscDNnUyttSVNGZzMzdTJrCkpjYnF6bnZ2YzN0YmxIZTB4ZzJNSE1JVlRkWmlSamNDQXdFQUFhT0NBUjB3Z2dFWk1Bc0dBMVVkRHdRRUF3SUUKTURBZEJnTlZIU1VFRmpBVUJnZ3JCZ0VGQlFjREFRWUlLd1lCQlFVSEF3SXdnYW9HQTFVZEVRU0JvakNCbjRJaApiV2x1YVc4dVltRnJaWEo1TFdsaExuTjJZeTVqYkhWemRHVnlMbXh2WTJGc2dnOXRhVzVwYnk1aVlXdGxjbmt0CmFXR0NLVzFwYm1sdkxXTnZibk52YkdVdVltRnJaWEo1TFdsaExuTjJZeTVqYkhWemRHVnlMbXh2WTJGc2doZHQKYVc1cGJ5MWpiMjV6YjJ4bExtSmhhMlZ5ZVMxcFlZSUZiV2x1YVcrQ0RXMXBibWx2TFdOdmJuTnZiR1dDQ1d4dgpZMkZzYUc5emRJY0Vmd0FBQVRBZEJnTlZIUTRFRmdRVXJXMzNxOWkreE5MdVZjcGUrKzlxUE56dVF4VXdId1lEClZSMGpCQmd3Rm9BVUErNnEva2M4ZlRRVTFFRHF6R1JmS1FwcTZtMHdEUVlKS29aSWh2Y05BUUVMQlFBRGdnSUIKQUlTT0NieFJWd2xtaWdjNldLM3hUaUJxNlJGMGNzdnV5NjJNYnI3N0h0Q3VPNHgxOTI5QjAxMXd1djdnWEhmawpPQm9qa3ZwZnFQUXlRZTk2dGFwRGJqYWZpeStlSHBPSm1lQjFNN2lQKzEzTGJJRjN3alE5SXZ1TWtnN3FQczZXCk15cnBvd1ZwK1BPeDU2SlJRK3lPcm5nakgxRG9FMW45NDBJR0lTZkRmb2g3cTljMkNvSlA2cWo3YWxid1U4RU0KYlB5d3B4WkFTNjYydUtBR0VNcFNLK2NuMXdUU3ZWSDN6NDVrMk9yUmwvQ05PZ0Fad1dyNzdQK1A3bW9FSHlmUQplR0dpclJTWWswUkJtYzdOTGd0Ry9iV0JQTEt4dHIyQmZidDFwZFZXakd4TmlwaDR4c1Z0YldpNnVOeUxYNE1qCllyK0FVUjd1MHlCVWxSc1VUL1dDbkFYdnRmNzRwcWJaNDZ3YjFnajEreU1GWHRNUldVV2NFcU1GVXRJdEsrUngKSlA4bUErbW9qdEdOcGdJZG53b1pPMTBsQkZ2U0ZKL1hGUFlsbHFKOGJpWmJ3RDZtWElzei9WQmdDRHlyQ3kybwpQeVhzR29HNDdTZkovQldvdHUwRkNaZERreCtQU0k2bkdKdyt2empSVzJ3TU9tdzJiZ0xkK3dsVDNpTXp4V3VOCkNidk0wSmpTQ2J3YVMvdE84emtrNGROeVhkWWNQbkJPNVJlM1IrQUV3T0RxV2F4T0ZXYmVUWW10bHlOTXdNT04Kd2lpR3pLWjkwaHM5QSt6M2x0QldNNmxNOFBJaFplcHB1TEZNTDRMSjZ0Ti93anJrOEVVMFBNT2ZlUTVjWXprZAp3QXdiRjVXaVhDd2JtaERCbW4xVVBrMjdPQUV0TzRSM3luaXM0eGNJbmVTQwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
|
||||
|
||||
minio-key.pem: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlKS2dJQkFBS0NBZ0VBbmp1N3RwUXd2Ri9Way9NdVJockpZVjBNSnF0ckZKL05YKzFPTEphTWhGWC9LWTEwCkxQK0I2NXcvcGxaR3lKdGJ0VWRXajV3WkxyWlFtSmIzcDRUdHVLNEJUMWd1M2JTWktJSFFOZUFnODFLc01HcSsKNXVYT29QV05yQWhoNGhZT0o0NW1Jc1liYTBEZE8yTUZ2OVd1V1ZuM1Q2Rnp6TTdxTGRDSnpaTmplYUI3bVRKagpkSEdyODRpVDg2QUVBK0h5d3ZzcU9valplK1VqS3U4WHJqeFVHUi9lUEZ2UUN5TWRXZ0ZiandpakovQm44UkNBClJJdWlFc3NqU0xRV09nUWdySUFUdkVGL2N5VWRaS0doYWMzLzRJNzF4RFdoWDcxWFdZd09ORW1yUTZjR3pYbXYKY1VNVjhIdkVXTXpiMDVScE9ZenltS3JieE5MNGVXTmFLdnJ2Y1p6Y016cElNNFJlR0t3Mk45c0FHczNZQlRSNwpXWExLV2duTFlic29IeDNkZFp1eVErSEp3RFRYcXBYdXRZaGFvQ2ZmSDIzbk1NRmlBSzFpbWViQkkxaFVjQWlQCnZwU3g3ZEkzbWdOUDRhZk4vb3FoTU9QZzhUeG13TWVrdnB6LzdjV2JDT05qa2Q5ZHEwVkxMU1crdHFJZmZWUWoKMExPVVB2WHJMT21QbWNMNmxTbFJPODg1VFZ2eEpEYnQyWFVSbWhxSnpwYXJJZk5oRjlFbHBIbWJzZHNsbVlQbwovOUpXVW1ya2JKNkFhZmRsSm5yQ1RHeEpwaXdMMGxKb3hJd3ZxWXQ4REJ1YzFjTkZLSklTTFpJeW82dVhSdU5WCm9OcHJ4Z2ZVdmw4Q2xwM2dTK21JU0ZnMzN1MmtKY2Jxem52dmMzdGJsSGUweGcyTUhNSVZUZFppUmpjQ0F3RUEKQVFLQ0FnQVhHQWE4amdKUzYvWERBeUlFejFJRzZNcW1OaXlKdFEwSGJCNFZ1ZDlHVFRyUmVMaTAvSkdjcnBCSAptWjM1RjF1YUtKQkVvM2ExYjV4eHVNN3FYeWRHNWZhQSt4RFVBTkM5cmJ5U3NHUit2dGtzczllcTRXMTM1bjdICjFlMWJUdmEvNVRPWTdhc0F5MVcrbmlRdnJHTW0zVStRQ3JOWTkvWUx1N3p4Q1FyaXJINTlqSEloZzVtaUVKUHYKWWJKVVVyellva20yZzFTaWxYMjlmV25LWHpteTlRaTliSFQvdXg5RWpLQXRUd2hwQXRoWXdaekc1RTVDU2UyYgpaZFU4b0crWVhaVUR5OWRyR2NhaGNrbVpwSndzelJDbmsyQTdGZXBTd25Nc1JIZy9obmdpc3hqZEFmcUl2N2VYCmNrYS9LWkQxK2xGSjROMzBhd29peFZKYXBZY2VwZk1hMS83dE1vZFFsOXdaOVZLWTZ6YlEwL1U0QndlMGQ0OEYKQ1graVlOZ2t4UWRmdVdwMFU2RkVlUTluR2tPMndZQUJxMCtzSDIxU2puRTQvTXh5anpLZCtjR08zUkdkTktxUwo5QTVubkh4MUwxVDN6Z0hOR2ZHS1F6Tzg5L09sVDBWVE80OEhkamxva0hmc3VTVG03N2tkZkU1TVFwamF2WktaCmo0QXoyWENGWkM2WkJxYm9wZlA1amVNWmI1WDU0aXVtclIwcHpRRGloQ3ZZWmYxTlVDa3hFdFZmaTF1eUtvLzYKMzhQK0pDcEtWSk1mYzhyYTFlWVRTV0ZaZDc1UXVMK1FtblpPVUNqQktXMnNQQTVGbERyTkVTdTQrREhCVVFtOApxdUxDUGdLaHA1TmVJRDVjcm5iVElYclVCb2tQdHpsWm10SEs5TFRYeTNPWkdXUmt5UUtDQVFFQTF0OFRhdWdCCmpMUVI2NXBTbGRXTDdVSnVGVlZUVW9DSlB5cHlOQjkvc1VsTC9Nd1RBbHlhWHoveU15Q2VCdWt3cnBMT1M0NHMKaG5kQlJOL3ZsdkRCaEovVjdYaDBEUWUvMGlqczRJdGNYQ1lpN3hFcWZOd1FQTUJEKzVyWkdKeU1iOEtLV3YwSwpBUnhES0k0YytLUkQwemQ1d1ZtelZSTjdLZlUzT3FXbGV1TjNMTFZqN3R6YU9kT2xSU0E3YWlCTS9odWQ1VFE5CkUwcEF3SDhIaGMxYW1qaUM4dEJsYUZlZ0lodXpJenhNU1hIUkJVcDNsaDMvb2UzNjM4Mm5zRUxjbE4xaFVWRGsKdDNUQVpjdHlYRkIzSEUydHpJdm9xRUpRN0Zkd3MwNUVQZXFIODFOekdjRlRNS1NieVJzNmtYYzhFQ0hPc2lYSAp6TDd5dlI3S1BmVHZhd0tDQVFFQXZJVlZRV3lpcU5ScTdTQkd3czg3WjVjZFlJOGdwSkI4bFlySklqaTRyVUVFCk14MmdVeCtYaHM5QTJSczQxZ1hsYXdvRWNqUDliZXJ2ZTYzMVZOV0M0K3Q5cFR2Vm9qcVhtcnZaNVVEN3V2Q0kKRlFPLy9JSUdqa0tFZkRwSUgvcWxEUlZlbEZTU1JjOVEvY0piZlNwS2JsYnJYZ1FtdG5KOWpsQkpFL1NMSW14UAo3OURVdGlmWmx5cFVRbDl5YzhSZzFSYmpyQWtjQVZhOVBHMXQ3cGhTanJkZHRKbXRVUmtFdGhYWTc3R3c5WHJUCjgwWlJHdkpIS0lsWlBmaHF2WlNGQzg4MVJJZ0lpRitCdWxobm16TUo0dmdYeXEwVCtRY1VGN0FBdFBRU0hyMHIKQm5wN1JlUDF5R201UDd0MjNmRU00Z0R1RENBUHQ0R1lZeUxFY2dpelpRS0NBUUVBaE9MVGJITnR1ZW9IaHpFYQowQ1dRY3p4NVBtSlZ0SmxmeUJ2bEkwMHp1SjMvQzZuZU84Q3ZqQ2JORUVlazA5dFZ5ekZwdWhxRWVPaTZDZkdBCmlGWC9LSmw5UVc4VVBwYkRVQ01WVkUxNzRsV0hsMWlEY1ZMY0MrWlFaUVBBTGROcm14YXlZRkZMNWFIbit1WGgKRHZqd0pXbVN1RHhVaDFJVUFyL3YxeXBvckJhUE5xdzcwSmJ2czRHc0haTXdpNUxNYXY4RGFLUWsvWkFYZWJWVwpIcThBMEk0UWxrREI1b1VDdVBWdWxXVU9QUUhSNWpiR3ZLVnkybCtHbnZEZU8wa3VpRFpkb0YrcUE3ZUY0YTZ2CjNGMjdQRnJpR0xXU1ByVTh2TjNiQ2xsbUpQQ3VBWk5qaE5NbU10Z3FySFpWZzI4OVN6RE5WeW04Wm1qVlVKY0IKTnM0TFh3S0NBUUVBdDRua0tBOFpDZC9NdmxJbk1qREorQit5ZFRqRG9oUWRod1lZcmgybEJ1QitzemxMeHNIQwpKM2lOL1JFNHMzNElEcjh3OXZMUThIdkRicGs5ZWJ0cGRIYm4yNysyVFB4WWIwZ21hc0ZxazJUc1IvRmZyL256CllmczJ1eStPMnJ1T2gzOWZsbkFEL0wxTGI5TVNlWGg4QUpMVkViSmU4ay9qRjNQb3dlbmFyOGZkeDNCOE4xL3kKd3U1dUhEU0szRlM3cFpwa1REQ09PR3QzVDJhR21iMW8yeE9Bd255L3RXM3pIVWVGN2s4RUp1clBnVkRiVTYyLwpRNkw4NUkxL2RsVXJkd1RrS25WNlFUTWl2UWFtei8zUHlVNmE4ekt3ZUVuQThSTGtqVWYyZ0VEUnE3d0JXbGtICkNIaU41NU9ldFpPaVpFSmRnQ2FTeHFrQWNMdi9uN29DMVFLQ0FRRUFxRkNHVDFWWG4yUGEwdFQ2ZCtvRnZYYTkKSENVMTFEbG9ad1hUOTY4cmhGOEJSazdLRVVvZXpFdjZiTUZsdUwzak9jMDNkUUs1WlF0anZUQkZKYlc3NVZMVgphcnR1U0xiVS9CVytnRGtZWmszQ241Z1B6QzlIbGRDa3MrS0lDOHJBcUNPdW9NRzc3SFlOVys3ckJLS3did2w1CmtDQW1uSmE2NWZZczdDWXpEOThmb0crVmxsc25VWCttMUxMZUtjclBEZWlpcW5kQmFTWi9NRVJnWmE2SXZid2kKMDVtNnFqL3ZXL1ZiV05iNVR4Z2N5MWpOOXpRbWJONFJ0Zmdzc3NKRmZzS3JNS0lxVnp1NkNMcEJ4eXBOUXZHYQo0S3UzVFZGcm9zaFlxWUpMVm1xVklYT1dWZk9IQTRMT2VpNmtDZTlHaTQydjdqS014M0dEK25CK1BWbVFXZz09Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg==
|
||||
@@ -666,7 +666,7 @@ replicas:
|
||||
- name: tenant-service
|
||||
count: 1
|
||||
- name: training-service
|
||||
count: 1
|
||||
count: 2 # Safe with MinIO storage
|
||||
- name: forecasting-service
|
||||
count: 1
|
||||
- name: sales-service
|
||||
|
||||
@@ -200,7 +200,7 @@ replicas:
|
||||
- name: tenant-service
|
||||
count: 2
|
||||
- name: training-service
|
||||
count: 2
|
||||
count: 3 # Safe with MinIO storage - no PVC conflicts
|
||||
- name: forecasting-service
|
||||
count: 3
|
||||
- name: sales-service
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: model-storage
|
||||
namespace: bakery-ia
|
||||
spec:
|
||||
storageClassName: microk8s-hostpath # MicroK8s storage class
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi # Increased for production (adjust based on your needs)
|
||||
@@ -31,7 +31,7 @@
|
||||
"y": 3,
|
||||
"w": 6,
|
||||
"h": 3,
|
||||
"i": "api-calls-per-user",
|
||||
"i": "user-actions",
|
||||
"moved": false,
|
||||
"static": false
|
||||
},
|
||||
@@ -40,7 +40,16 @@
|
||||
"y": 3,
|
||||
"w": 6,
|
||||
"h": 3,
|
||||
"i": "session-duration",
|
||||
"i": "page-views",
|
||||
"moved": false,
|
||||
"static": false
|
||||
},
|
||||
{
|
||||
"x": 0,
|
||||
"y": 6,
|
||||
"w": 12,
|
||||
"h": 4,
|
||||
"i": "geo-visitors",
|
||||
"moved": false,
|
||||
"static": false
|
||||
}
|
||||
@@ -51,7 +60,7 @@
|
||||
"name": "service",
|
||||
"description": "Filter by service name",
|
||||
"type": "QUERY",
|
||||
"queryValue": "SELECT DISTINCT(resource_attrs['service.name']) as value FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'active_users' AND value != '' ORDER BY value",
|
||||
"queryValue": "SELECT DISTINCT(serviceName) FROM signoz_traces.distributed_signoz_index_v2 ORDER BY serviceName",
|
||||
"customValue": "",
|
||||
"textboxValue": "",
|
||||
"showALLOption": true,
|
||||
@@ -59,7 +68,7 @@
|
||||
"order": 1,
|
||||
"modificationUUID": "",
|
||||
"sort": "ASC",
|
||||
"selectedValue": null
|
||||
"selectedValue": "bakery-frontend"
|
||||
}
|
||||
},
|
||||
"widgets": [
|
||||
@@ -75,26 +84,26 @@
|
||||
"builder": {
|
||||
"queryData": [
|
||||
{
|
||||
"dataSource": "metrics",
|
||||
"dataSource": "traces",
|
||||
"queryName": "A",
|
||||
"aggregateOperator": "sum",
|
||||
"aggregateOperator": "count_distinct",
|
||||
"aggregateAttribute": {
|
||||
"key": "active_users",
|
||||
"dataType": "int64",
|
||||
"type": "Gauge",
|
||||
"isColumn": false
|
||||
"key": "user.id",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"timeAggregation": "latest",
|
||||
"timeAggregation": "count_distinct",
|
||||
"spaceAggregation": "sum",
|
||||
"functions": [],
|
||||
"filters": {
|
||||
"items": [
|
||||
{
|
||||
"key": {
|
||||
"key": "service.name",
|
||||
"key": "serviceName",
|
||||
"dataType": "string",
|
||||
"type": "resource",
|
||||
"isColumn": false
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"op": "=",
|
||||
"value": "{{.service}}"
|
||||
@@ -110,13 +119,13 @@
|
||||
"orderBy": [],
|
||||
"groupBy": [
|
||||
{
|
||||
"key": "service.name",
|
||||
"key": "serviceName",
|
||||
"dataType": "string",
|
||||
"type": "resource",
|
||||
"isColumn": false
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
}
|
||||
],
|
||||
"legend": "{{service.name}}",
|
||||
"legend": "{{serviceName}}",
|
||||
"reduceTo": "sum"
|
||||
}
|
||||
],
|
||||
@@ -139,16 +148,16 @@
|
||||
"builder": {
|
||||
"queryData": [
|
||||
{
|
||||
"dataSource": "metrics",
|
||||
"dataSource": "traces",
|
||||
"queryName": "A",
|
||||
"aggregateOperator": "sum",
|
||||
"aggregateOperator": "count",
|
||||
"aggregateAttribute": {
|
||||
"key": "user_sessions_total",
|
||||
"dataType": "int64",
|
||||
"type": "Counter",
|
||||
"isColumn": false
|
||||
"key": "session.id",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"timeAggregation": "sum",
|
||||
"timeAggregation": "count",
|
||||
"spaceAggregation": "sum",
|
||||
"functions": [],
|
||||
"filters": {
|
||||
@@ -162,6 +171,16 @@
|
||||
},
|
||||
"op": "=",
|
||||
"value": "{{.service}}"
|
||||
},
|
||||
{
|
||||
"key": {
|
||||
"key": "span.name",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"op": "=",
|
||||
"value": "user_session"
|
||||
}
|
||||
],
|
||||
"op": "AND"
|
||||
@@ -192,9 +211,9 @@
|
||||
"yAxisUnit": "none"
|
||||
},
|
||||
{
|
||||
"id": "api-calls-per-user",
|
||||
"title": "API Calls per User",
|
||||
"description": "Average API calls per user by service",
|
||||
"id": "user-actions",
|
||||
"title": "User Actions",
|
||||
"description": "Total user actions by service",
|
||||
"isStacked": false,
|
||||
"nullZeroValues": "zero",
|
||||
"opacity": "1",
|
||||
@@ -203,17 +222,17 @@
|
||||
"builder": {
|
||||
"queryData": [
|
||||
{
|
||||
"dataSource": "metrics",
|
||||
"dataSource": "traces",
|
||||
"queryName": "A",
|
||||
"aggregateOperator": "avg",
|
||||
"aggregateOperator": "count",
|
||||
"aggregateAttribute": {
|
||||
"key": "api_calls_per_user",
|
||||
"dataType": "float64",
|
||||
"type": "Gauge",
|
||||
"isColumn": false
|
||||
"key": "user.action",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"timeAggregation": "avg",
|
||||
"spaceAggregation": "avg",
|
||||
"timeAggregation": "count",
|
||||
"spaceAggregation": "sum",
|
||||
"functions": [],
|
||||
"filters": {
|
||||
"items": [
|
||||
@@ -226,6 +245,16 @@
|
||||
},
|
||||
"op": "=",
|
||||
"value": "{{.service}}"
|
||||
},
|
||||
{
|
||||
"key": {
|
||||
"key": "span.name",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"op": "=",
|
||||
"value": "user_action"
|
||||
}
|
||||
],
|
||||
"op": "AND"
|
||||
@@ -245,7 +274,7 @@
|
||||
}
|
||||
],
|
||||
"legend": "{{serviceName}}",
|
||||
"reduceTo": "avg"
|
||||
"reduceTo": "sum"
|
||||
}
|
||||
],
|
||||
"queryFormulas": []
|
||||
@@ -256,9 +285,9 @@
|
||||
"yAxisUnit": "none"
|
||||
},
|
||||
{
|
||||
"id": "session-duration",
|
||||
"title": "Session Duration",
|
||||
"description": "Average session duration by service",
|
||||
"id": "page-views",
|
||||
"title": "Page Views",
|
||||
"description": "Total page views by service",
|
||||
"isStacked": false,
|
||||
"nullZeroValues": "zero",
|
||||
"opacity": "1",
|
||||
@@ -267,17 +296,17 @@
|
||||
"builder": {
|
||||
"queryData": [
|
||||
{
|
||||
"dataSource": "metrics",
|
||||
"dataSource": "traces",
|
||||
"queryName": "A",
|
||||
"aggregateOperator": "avg",
|
||||
"aggregateOperator": "count",
|
||||
"aggregateAttribute": {
|
||||
"key": "session_duration_seconds",
|
||||
"dataType": "float64",
|
||||
"type": "Gauge",
|
||||
"isColumn": false
|
||||
"key": "page.path",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"timeAggregation": "avg",
|
||||
"spaceAggregation": "avg",
|
||||
"timeAggregation": "count",
|
||||
"spaceAggregation": "sum",
|
||||
"functions": [],
|
||||
"filters": {
|
||||
"items": [
|
||||
@@ -290,6 +319,16 @@
|
||||
},
|
||||
"op": "=",
|
||||
"value": "{{.service}}"
|
||||
},
|
||||
{
|
||||
"key": {
|
||||
"key": "span.name",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"op": "=",
|
||||
"value": "page_view"
|
||||
}
|
||||
],
|
||||
"op": "AND"
|
||||
@@ -309,7 +348,7 @@
|
||||
}
|
||||
],
|
||||
"legend": "{{serviceName}}",
|
||||
"reduceTo": "avg"
|
||||
"reduceTo": "sum"
|
||||
}
|
||||
],
|
||||
"queryFormulas": []
|
||||
@@ -317,7 +356,74 @@
|
||||
"queryType": "builder"
|
||||
},
|
||||
"fillSpans": false,
|
||||
"yAxisUnit": "seconds"
|
||||
"yAxisUnit": "none"
|
||||
},
|
||||
{
|
||||
"id": "geo-visitors",
|
||||
"title": "Geolocation Visitors",
|
||||
"description": "Number of visitors who shared location data",
|
||||
"isStacked": false,
|
||||
"nullZeroValues": "zero",
|
||||
"opacity": "1",
|
||||
"panelTypes": "value",
|
||||
"query": {
|
||||
"builder": {
|
||||
"queryData": [
|
||||
{
|
||||
"dataSource": "traces",
|
||||
"queryName": "A",
|
||||
"aggregateOperator": "count",
|
||||
"aggregateAttribute": {
|
||||
"key": "user.id",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"timeAggregation": "count",
|
||||
"spaceAggregation": "sum",
|
||||
"functions": [],
|
||||
"filters": {
|
||||
"items": [
|
||||
{
|
||||
"key": {
|
||||
"key": "serviceName",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"op": "=",
|
||||
"value": "{{.service}}"
|
||||
},
|
||||
{
|
||||
"key": {
|
||||
"key": "span.name",
|
||||
"dataType": "string",
|
||||
"type": "tag",
|
||||
"isColumn": true
|
||||
},
|
||||
"op": "=",
|
||||
"value": "user_location"
|
||||
}
|
||||
],
|
||||
"op": "AND"
|
||||
},
|
||||
"expression": "A",
|
||||
"disabled": false,
|
||||
"having": [],
|
||||
"stepInterval": 60,
|
||||
"limit": null,
|
||||
"orderBy": [],
|
||||
"groupBy": [],
|
||||
"legend": "Visitors with Location Data (See GEOLOCATION_VISUALIZATION_GUIDE.md for map integration)",
|
||||
"reduceTo": "sum"
|
||||
}
|
||||
],
|
||||
"queryFormulas": []
|
||||
},
|
||||
"queryType": "builder"
|
||||
},
|
||||
"fillSpans": false,
|
||||
"yAxisUnit": "none"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1 +1 @@
|
||||
1BE074336AF19EA8C676D7E8D0185EBCA0B1D1FF
|
||||
1BE074336AF19EA8C676D7E8D0185EBCA0B1D202
|
||||
|
||||
111
infrastructure/tls/generate-minio-certificates.sh
Executable file
111
infrastructure/tls/generate-minio-certificates.sh
Executable file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Generate MinIO TLS certificates using existing CA
|
||||
# This script generates certificates for MinIO server
|
||||
|
||||
set -e
|
||||
|
||||
TLS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
CA_DIR="$TLS_DIR/ca"
|
||||
MINIO_DIR="$TLS_DIR/minio"
|
||||
|
||||
mkdir -p "$MINIO_DIR"
|
||||
|
||||
echo "Generating MinIO TLS certificates using existing CA..."
|
||||
echo "CA Directory: $CA_DIR"
|
||||
echo "MinIO Directory: $MINIO_DIR"
|
||||
echo ""
|
||||
|
||||
# Check if CA exists
|
||||
if [ ! -f "$CA_DIR/ca-cert.pem" ] || [ ! -f "$CA_DIR/ca-key.pem" ]; then
|
||||
echo "ERROR: CA certificates not found. Please run generate-certificates.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Generate MinIO server private key
|
||||
echo "Step 1: Generating MinIO server private key..."
|
||||
openssl genrsa -out "$MINIO_DIR/minio-key.pem" 4096
|
||||
|
||||
# Convert to traditional RSA format (required by MinIO)
|
||||
echo "Step 1b: Converting private key to traditional RSA format..."
|
||||
openssl rsa -in "$MINIO_DIR/minio-key.pem" -traditional -out "$MINIO_DIR/minio-key.pem"
|
||||
|
||||
# Create certificate signing request (CSR)
|
||||
echo "Step 2: Creating MinIO certificate signing request..."
|
||||
openssl req -new -key "$MINIO_DIR/minio-key.pem" -out "$MINIO_DIR/minio.csr" \
|
||||
-subj "/C=US/ST=California/L=SanFrancisco/O=BakeryIA/OU=Storage/CN=minio.bakery-ia.svc.cluster.local"
|
||||
|
||||
# Create SAN (Subject Alternative Names) configuration for MinIO
|
||||
cat > "$MINIO_DIR/san.cnf" <<EOF
|
||||
[req]
|
||||
distinguished_name = req_distinguished_name
|
||||
req_extensions = v3_req
|
||||
prompt = no
|
||||
|
||||
[req_distinguished_name]
|
||||
C = US
|
||||
ST = California
|
||||
L = SanFrancisco
|
||||
O = BakeryIA
|
||||
OU = Storage
|
||||
CN = minio.bakery-ia.svc.cluster.local
|
||||
|
||||
[v3_req]
|
||||
keyUsage = keyEncipherment, dataEncipherment
|
||||
extendedKeyUsage = serverAuth, clientAuth
|
||||
subjectAltName = @alt_names
|
||||
|
||||
[alt_names]
|
||||
DNS.1 = minio.bakery-ia.svc.cluster.local
|
||||
DNS.2 = minio.bakery-ia
|
||||
DNS.3 = minio-console.bakery-ia.svc.cluster.local
|
||||
DNS.4 = minio-console.bakery-ia
|
||||
DNS.5 = minio
|
||||
DNS.6 = minio-console
|
||||
DNS.7 = localhost
|
||||
IP.1 = 127.0.0.1
|
||||
EOF
|
||||
|
||||
# Sign the certificate with CA (valid for 3 years)
|
||||
echo "Step 3: Signing MinIO certificate with CA..."
|
||||
openssl x509 -req -in "$MINIO_DIR/minio.csr" \
|
||||
-CA "$CA_DIR/ca-cert.pem" -CAkey "$CA_DIR/ca-key.pem" -CAcreateserial \
|
||||
-out "$MINIO_DIR/minio-cert.pem" -days 1095 \
|
||||
-extensions v3_req -extfile "$MINIO_DIR/san.cnf"
|
||||
|
||||
# Set proper permissions
|
||||
chmod 600 "$MINIO_DIR/minio-key.pem"
|
||||
chmod 644 "$MINIO_DIR/minio-cert.pem"
|
||||
|
||||
# Copy CA cert for MinIO
|
||||
cp "$CA_DIR/ca-cert.pem" "$MINIO_DIR/ca-cert.pem"
|
||||
|
||||
echo ""
|
||||
echo "Step 4: Verifying MinIO certificates..."
|
||||
|
||||
# Verify MinIO certificate
|
||||
echo "MinIO certificate details:"
|
||||
openssl x509 -in "$MINIO_DIR/minio-cert.pem" -noout -subject -issuer -dates
|
||||
openssl verify -CAfile "$CA_DIR/ca-cert.pem" "$MINIO_DIR/minio-cert.pem"
|
||||
|
||||
echo ""
|
||||
echo "==================="
|
||||
echo "✓ MinIO certificates generated successfully!"
|
||||
echo ""
|
||||
echo "Generated files:"
|
||||
echo " MinIO:"
|
||||
echo " - $MINIO_DIR/minio-cert.pem (Server certificate)"
|
||||
echo " - $MINIO_DIR/minio-key.pem (Server private key - traditional RSA format)"
|
||||
echo " - $MINIO_DIR/ca-cert.pem (CA certificate)"
|
||||
echo ""
|
||||
echo "Important Notes:"
|
||||
echo " • Private key is in traditional RSA format (BEGIN RSA PRIVATE KEY)"
|
||||
echo " • This format is required by MinIO to avoid 'The private key contains additional data' error"
|
||||
echo " • Certificates follow the standardized Opaque secret structure"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Update Kubernetes minio-tls secret with these certificates"
|
||||
echo " 2. Apply the updated secret to your cluster"
|
||||
echo " 3. Restart MinIO pods if necessary"
|
||||
echo ""
|
||||
echo "For more details, see: docs/MINIO_TLS_FIX_SUMMARY.md"
|
||||
33
infrastructure/tls/minio/ca-cert.pem
Normal file
33
infrastructure/tls/minio/ca-cert.pem
Normal file
@@ -0,0 +1,33 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFyzCCA7OgAwIBAgIUPgOqNY+ZoKByQ1MfO8lkiGhOmxIwDQYJKoZIhvcNAQEL
|
||||
BQAwdTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFTATBgNVBAcM
|
||||
DFNhbkZyYW5jaXNjbzERMA8GA1UECgwIQmFrZXJ5SUExETAPBgNVBAsMCFNlY3Vy
|
||||
aXR5MRQwEgYDVQQDDAtCYWtlcnlJQS1DQTAeFw0yNTEwMTgxNDIyMTRaFw0zNTEw
|
||||
MTYxNDIyMTRaMHUxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRUw
|
||||
EwYDVQQHDAxTYW5GcmFuY2lzY28xETAPBgNVBAoMCEJha2VyeUlBMREwDwYDVQQL
|
||||
DAhTZWN1cml0eTEUMBIGA1UEAwwLQmFrZXJ5SUEtQ0EwggIiMA0GCSqGSIb3DQEB
|
||||
AQUAA4ICDwAwggIKAoICAQDRD5O2egkYg9HNRR5SU0bLnGHjpv/RagrM7dhusaWn
|
||||
rfDF5VpTZ4s9/9sOEJ0NyjuoKXamouTwR1nw19FdH8f1eomcQ4eKw2HkxoxqR34t
|
||||
RDaAGz3bWO+raTQ4SyMK7XFMovUUiLl+GO23l1BNPfhzkcDkZ97m434f1QVo99tb
|
||||
hV4bILaoFIqf09M0E1/faB+JCR8Ykl7LoXguz3VR/BUnd0vMsTMWueD/2nVuUZO0
|
||||
0pUmTUBQ2Qd7657k/HWd/1wcEAL9dXNRbxhDNfGgc3WtQhggcpYLQafLa81tlxyc
|
||||
wDgN6PdElUlxgX/OuoZ1ylMZE7xpsMtpn1AweodVbm3Qp5A1ydybE61u1urYz1Lt
|
||||
WNZ9eOfAqewiYQHVZWMC4a4Sa+2yM6q5PX/4g+TbITh8hZJwXPK5EDig7vF14JPl
|
||||
lERNpwia3n6a0P703HPN6rkQO5kVTdiUsfibMtcUJHLyWWQARBmyeVfkICaaeYEl
|
||||
ELkswa9NVESKvQaHKSiHZFhEI0aAvcpAjm1EOhEa+hSRhOoFyUOvG+cMOfcBSmL0
|
||||
UmlD/lfanTT0zk5aqspEkXGeBw31rmZ/0AZOjV2ppRxWWekzo9Bf7g6eLTY4UCC5
|
||||
MyPtzmx9TbXrNAnXhiF6Lg5h28R42GTe5Ad6THkF9S/Khq8u0dY5SA2GUF1EbQO8
|
||||
KwIDAQABo1MwUTAdBgNVHQ4EFgQUA+6q/kc8fTQU1EDqzGRfKQpq6m0wHwYDVR0j
|
||||
BBgwFoAUA+6q/kc8fTQU1EDqzGRfKQpq6m0wDwYDVR0TAQH/BAUwAwEB/zANBgkq
|
||||
hkiG9w0BAQsFAAOCAgEAQuvFh2+HQFy8VTcUgalEViayt1zQGv4rISmiq3G6IeXP
|
||||
XS4gwqHkFzTwZvmohTwmOCwW/xF4KgxmFbyWNrEJJEqcbedqUWV/0BCaFmJvUddI
|
||||
+ex/iD3Febu8AFI+J8lBH/CenDiSLHhgyseY8uwRnXsshX5RnDirF1uKr1J635an
|
||||
GlyFINUrnQlguEvtr0enGUlzT5rWj4y0AWUdbXi8vRsjWoQ8Ja0BxTrYYh/kO/FI
|
||||
PtqX7wsxoJMDEQ71zhwa7WLQc2dfb2rAr1uBh3qNwiVBINB+t3JFv72xqsWgurIB
|
||||
If2soRTI2nMe5gTG1Dfd+V24jfa/yIgAsMjCzmGQK20vobX4sAVnmPVbZg9SLFZi
|
||||
Midkn9O9U68MEOe3Iascld7fp5Jk+HrbJU6/s16EER/AgD3Ooj3wRgjTCS+ADD+j
|
||||
xo2O8VX2kPo03AN+iYa3nJmlMFzCrzT+8ZxSnP5FqGg2ECEbqqA0B/5naVpmdYaV
|
||||
41oFLswcFm2iqGawbsLN9x3tvICuE93HYk1j72PzXaiSLtpvamH1dRYC+HUM1L0O
|
||||
49CNMYJeL/NlyQuZJm2X0qDNSXmRML8HU9sOwWX6pPPJOzuqtgdx/+lkGAd2wZJU
|
||||
IVbmL6Qvzdbta/cSVwsLtBzG48a1b4KBc7WLHTwbrdBRTg0TkLY4kvCZe5nNl4E=
|
||||
-----END CERTIFICATE-----
|
||||
38
infrastructure/tls/minio/minio-cert.pem
Normal file
38
infrastructure/tls/minio/minio-cert.pem
Normal file
@@ -0,0 +1,38 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIGrTCCBJWgAwIBAgIUG+B0M2rxnqjGdtfo0BhevKCx0gAwDQYJKoZIhvcNAQEL
|
||||
BQAwdTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFTATBgNVBAcM
|
||||
DFNhbkZyYW5jaXNjbzERMA8GA1UECgwIQmFrZXJ5SUExETAPBgNVBAsMCFNlY3Vy
|
||||
aXR5MRQwEgYDVQQDDAtCYWtlcnlJQS1DQTAeFw0yNjAxMTcxNDU0NDhaFw0yOTAx
|
||||
MTYxNDU0NDhaMIGKMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEV
|
||||
MBMGA1UEBwwMU2FuRnJhbmNpc2NvMREwDwYDVQQKDAhCYWtlcnlJQTEQMA4GA1UE
|
||||
CwwHU3RvcmFnZTEqMCgGA1UEAwwhbWluaW8uYmFrZXJ5LWlhLnN2Yy5jbHVzdGVy
|
||||
LmxvY2FsMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAnju7tpQwvF/V
|
||||
k/MuRhrJYV0MJqtrFJ/NX+1OLJaMhFX/KY10LP+B65w/plZGyJtbtUdWj5wZLrZQ
|
||||
mJb3p4TtuK4BT1gu3bSZKIHQNeAg81KsMGq+5uXOoPWNrAhh4hYOJ45mIsYba0Dd
|
||||
O2MFv9WuWVn3T6FzzM7qLdCJzZNjeaB7mTJjdHGr84iT86AEA+HywvsqOojZe+Uj
|
||||
Ku8XrjxUGR/ePFvQCyMdWgFbjwijJ/Bn8RCARIuiEssjSLQWOgQgrIATvEF/cyUd
|
||||
ZKGhac3/4I71xDWhX71XWYwONEmrQ6cGzXmvcUMV8HvEWMzb05RpOYzymKrbxNL4
|
||||
eWNaKvrvcZzcMzpIM4ReGKw2N9sAGs3YBTR7WXLKWgnLYbsoHx3ddZuyQ+HJwDTX
|
||||
qpXutYhaoCffH23nMMFiAK1imebBI1hUcAiPvpSx7dI3mgNP4afN/oqhMOPg8Txm
|
||||
wMekvpz/7cWbCONjkd9dq0VLLSW+tqIffVQj0LOUPvXrLOmPmcL6lSlRO885TVvx
|
||||
JDbt2XURmhqJzparIfNhF9ElpHmbsdslmYPo/9JWUmrkbJ6AafdlJnrCTGxJpiwL
|
||||
0lJoxIwvqYt8DBuc1cNFKJISLZIyo6uXRuNVoNprxgfUvl8Clp3gS+mISFg33u2k
|
||||
Jcbqznvvc3tblHe0xg2MHMIVTdZiRjcCAwEAAaOCAR0wggEZMAsGA1UdDwQEAwIE
|
||||
MDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwgaoGA1UdEQSBojCBn4Ih
|
||||
bWluaW8uYmFrZXJ5LWlhLnN2Yy5jbHVzdGVyLmxvY2Fsgg9taW5pby5iYWtlcnkt
|
||||
aWGCKW1pbmlvLWNvbnNvbGUuYmFrZXJ5LWlhLnN2Yy5jbHVzdGVyLmxvY2Fsghdt
|
||||
aW5pby1jb25zb2xlLmJha2VyeS1pYYIFbWluaW+CDW1pbmlvLWNvbnNvbGWCCWxv
|
||||
Y2FsaG9zdIcEfwAAATAdBgNVHQ4EFgQUrW33q9i+xNLuVcpe++9qPNzuQxUwHwYD
|
||||
VR0jBBgwFoAUA+6q/kc8fTQU1EDqzGRfKQpq6m0wDQYJKoZIhvcNAQELBQADggIB
|
||||
AISOCbxRVwlmigc6WK3xTiBq6RF0csvuy62Mbr77HtCuO4x1929B011wuv7gXHfk
|
||||
OBojkvpfqPQyQe96tapDbjafiy+eHpOJmeB1M7iP+13LbIF3wjQ9IvuMkg7qPs6W
|
||||
MyrpowVp+POx56JRQ+yOrngjH1DoE1n940IGISfDfoh7q9c2CoJP6qj7albwU8EM
|
||||
bPywpxZAS662uKAGEMpSK+cn1wTSvVH3z45k2OrRl/CNOgAZwWr77P+P7moEHyfQ
|
||||
eGGirRSYk0RBmc7NLgtG/bWBPLKxtr2Bfbt1pdVWjGxNiph4xsVtbWi6uNyLX4Mj
|
||||
Yr+AUR7u0yBUlRsUT/WCnAXvtf74pqbZ46wb1gj1+yMFXtMRWUWcEqMFUtItK+Rx
|
||||
JP8mA+mojtGNpgIdnwoZO10lBFvSFJ/XFPYllqJ8biZbwD6mXIsz/VBgCDyrCy2o
|
||||
PyXsGoG47SfJ/BWotu0FCZdDkx+PSI6nGJw+vzjRW2wMOmw2bgLd+wlT3iMzxWuN
|
||||
CbvM0JjSCbwaS/tO8zkk4dNyXdYcPnBO5Re3R+AEwODqWaxOFWbeTYmtlyNMwMON
|
||||
wiiGzKZ90hs9A+z3ltBWM6lM8PIhZeppuLFML4LJ6tN/wjrk8EU0PMOfeQ5cYzkd
|
||||
wAwbF5WiXCwbmhDBmn1UPk27OAEtO4R3ynis4xcIneSC
|
||||
-----END CERTIFICATE-----
|
||||
51
infrastructure/tls/minio/minio-key.pem
Normal file
51
infrastructure/tls/minio/minio-key.pem
Normal file
@@ -0,0 +1,51 @@
|
||||
-----BEGIN RSA PRIVATE KEY-----
|
||||
MIIJKgIBAAKCAgEAnju7tpQwvF/Vk/MuRhrJYV0MJqtrFJ/NX+1OLJaMhFX/KY10
|
||||
LP+B65w/plZGyJtbtUdWj5wZLrZQmJb3p4TtuK4BT1gu3bSZKIHQNeAg81KsMGq+
|
||||
5uXOoPWNrAhh4hYOJ45mIsYba0DdO2MFv9WuWVn3T6FzzM7qLdCJzZNjeaB7mTJj
|
||||
dHGr84iT86AEA+HywvsqOojZe+UjKu8XrjxUGR/ePFvQCyMdWgFbjwijJ/Bn8RCA
|
||||
RIuiEssjSLQWOgQgrIATvEF/cyUdZKGhac3/4I71xDWhX71XWYwONEmrQ6cGzXmv
|
||||
cUMV8HvEWMzb05RpOYzymKrbxNL4eWNaKvrvcZzcMzpIM4ReGKw2N9sAGs3YBTR7
|
||||
WXLKWgnLYbsoHx3ddZuyQ+HJwDTXqpXutYhaoCffH23nMMFiAK1imebBI1hUcAiP
|
||||
vpSx7dI3mgNP4afN/oqhMOPg8TxmwMekvpz/7cWbCONjkd9dq0VLLSW+tqIffVQj
|
||||
0LOUPvXrLOmPmcL6lSlRO885TVvxJDbt2XURmhqJzparIfNhF9ElpHmbsdslmYPo
|
||||
/9JWUmrkbJ6AafdlJnrCTGxJpiwL0lJoxIwvqYt8DBuc1cNFKJISLZIyo6uXRuNV
|
||||
oNprxgfUvl8Clp3gS+mISFg33u2kJcbqznvvc3tblHe0xg2MHMIVTdZiRjcCAwEA
|
||||
AQKCAgAXGAa8jgJS6/XDAyIEz1IG6MqmNiyJtQ0HbB4Vud9GTTrReLi0/JGcrpBH
|
||||
mZ35F1uaKJBEo3a1b5xxuM7qXydG5faA+xDUANC9rbySsGR+vtkss9eq4W135n7H
|
||||
1e1bTva/5TOY7asAy1W+niQvrGMm3U+QCrNY9/YLu7zxCQrirH59jHIhg5miEJPv
|
||||
YbJUUrzYokm2g1SilX29fWnKXzmy9Qi9bHT/ux9EjKAtTwhpAthYwZzG5E5CSe2b
|
||||
ZdU8oG+YXZUDy9drGcahckmZpJwszRCnk2A7FepSwnMsRHg/hngisxjdAfqIv7eX
|
||||
cka/KZD1+lFJ4N30awoixVJapYcepfMa1/7tModQl9wZ9VKY6zbQ0/U4Bwe0d48F
|
||||
CX+iYNgkxQdfuWp0U6FEeQ9nGkO2wYABq0+sH21SjnE4/MxyjzKd+cGO3RGdNKqS
|
||||
9A5nnHx1L1T3zgHNGfGKQzO89/OlT0VTO48HdjlokHfsuSTm77kdfE5MQpjavZKZ
|
||||
j4Az2XCFZC6ZBqbopfP5jeMZb5X54iumrR0pzQDihCvYZf1NUCkxEtVfi1uyKo/6
|
||||
38P+JCpKVJMfc8ra1eYTSWFZd75QuL+QmnZOUCjBKW2sPA5FlDrNESu4+DHBUQm8
|
||||
quLCPgKhp5NeID5crnbTIXrUBokPtzlZmtHK9LTXy3OZGWRkyQKCAQEA1t8TaugB
|
||||
jLQR65pSldWL7UJuFVVTUoCJPypyNB9/sUlL/MwTAlyaXz/yMyCeBukwrpLOS44s
|
||||
hndBRN/vlvDBhJ/V7Xh0DQe/0ijs4ItcXCYi7xEqfNwQPMBD+5rZGJyMb8KKWv0K
|
||||
ARxDKI4c+KRD0zd5wVmzVRN7KfU3OqWleuN3LLVj7tzaOdOlRSA7aiBM/hud5TQ9
|
||||
E0pAwH8Hhc1amjiC8tBlaFegIhuzIzxMSXHRBUp3lh3/oe36382nsELclN1hUVDk
|
||||
t3TAZctyXFB3HE2tzIvoqEJQ7Fdws05EPeqH81NzGcFTMKSbyRs6kXc8ECHOsiXH
|
||||
zL7yvR7KPfTvawKCAQEAvIVVQWyiqNRq7SBGws87Z5cdYI8gpJB8lYrJIji4rUEE
|
||||
Mx2gUx+Xhs9A2Rs41gXlawoEcjP9berve631VNWC4+t9pTvVojqXmrvZ5UD7uvCI
|
||||
FQO//IIGjkKEfDpIH/qlDRVelFSSRc9Q/cJbfSpKblbrXgQmtnJ9jlBJE/SLImxP
|
||||
79DUtifZlypUQl9yc8Rg1RbjrAkcAVa9PG1t7phSjrddtJmtURkEthXY77Gw9XrT
|
||||
80ZRGvJHKIlZPfhqvZSFC881RIgIiF+BulhnmzMJ4vgXyq0T+QcUF7AAtPQSHr0r
|
||||
Bnp7ReP1yGm5P7t23fEM4gDuDCAPt4GYYyLEcgizZQKCAQEAhOLTbHNtueoHhzEa
|
||||
0CWQczx5PmJVtJlfyBvlI00zuJ3/C6neO8CvjCbNEEek09tVyzFpuhqEeOi6CfGA
|
||||
iFX/KJl9QW8UPpbDUCMVVE174lWHl1iDcVLcC+ZQZQPALdNrmxayYFFL5aHn+uXh
|
||||
DvjwJWmSuDxUh1IUAr/v1yporBaPNqw70Jbvs4GsHZMwi5LMav8DaKQk/ZAXebVW
|
||||
Hq8A0I4QlkDB5oUCuPVulWUOPQHR5jbGvKVy2l+GnvDeO0kuiDZdoF+qA7eF4a6v
|
||||
3F27PFriGLWSPrU8vN3bCllmJPCuAZNjhNMmMtgqrHZVg289SzDNVym8ZmjVUJcB
|
||||
Ns4LXwKCAQEAt4nkKA8ZCd/MvlInMjDJ+B+ydTjDohQdhwYYrh2lBuB+szlLxsHC
|
||||
J3iN/RE4s34IDr8w9vLQ8HvDbpk9ebtpdHbn27+2TPxYb0gmasFqk2TsR/Ffr/nz
|
||||
Yfs2uy+O2ruOh39flnAD/L1Lb9MSeXh8AJLVEbJe8k/jF3Powenar8fdx3B8N1/y
|
||||
wu5uHDSK3FS7pZpkTDCOOGt3T2aGmb1o2xOAwny/tW3zHUeF7k8EJurPgVDbU62/
|
||||
Q6L85I1/dlUrdwTkKnV6QTMivQamz/3PyU6a8zKweEnA8RLkjUf2gEDRq7wBWlkH
|
||||
CHiN55OetZOiZEJdgCaSxqkAcLv/n7oC1QKCAQEAqFCGT1VXn2Pa0tT6d+oFvXa9
|
||||
HCU11DloZwXT968rhF8BRk7KEUoezEv6bMFluL3jOc03dQK5ZQtjvTBFJbW75VLV
|
||||
artuSLbU/BW+gDkYZk3Cn5gPzC9HldCks+KIC8rAqCOuoMG77HYNW+7rBKKwbwl5
|
||||
kCAmnJa65fYs7CYzD98foG+VllsnUX+m1LLeKcrPDeiiqndBaSZ/MERgZa6Ivbwi
|
||||
05m6qj/vW/VbWNb5Txgcy1jN9zQmbN4RtfgsssJFfsKrMKIqVzu6CLpBxypNQvGa
|
||||
4Ku3TVFroshYqYJLVmqVIXOWVfOHA4LOei6kCe9Gi42v7jKMx3GD+nB+PVmQWg==
|
||||
-----END RSA PRIVATE KEY-----
|
||||
28
infrastructure/tls/minio/minio.csr
Normal file
28
infrastructure/tls/minio/minio.csr
Normal file
@@ -0,0 +1,28 @@
|
||||
-----BEGIN CERTIFICATE REQUEST-----
|
||||
MIIE0DCCArgCAQAwgYoxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlh
|
||||
MRUwEwYDVQQHDAxTYW5GcmFuY2lzY28xETAPBgNVBAoMCEJha2VyeUlBMRAwDgYD
|
||||
VQQLDAdTdG9yYWdlMSowKAYDVQQDDCFtaW5pby5iYWtlcnktaWEuc3ZjLmNsdXN0
|
||||
ZXIubG9jYWwwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCN+XNRDxtM
|
||||
siHWRzlX5bWLjd2la1w0HeUbzZ8l4mVRQTvgVZ2ilhZ4g749D3hE2HK4PV7yDXyN
|
||||
ofIz91s5CCIK9iuReukoYeTST0VRrNNUd72oe1oxp4v+iPOOQE8K6IH63ztc8EHZ
|
||||
0cHxNVrm7HCVLFFG09WGn9th9b51OVhCUFTyQqfvnL3rhvL0vvx7xTuVISGhw8wc
|
||||
/7DZPR2OFdSS8raVpWqy+vi0lgjQnbdcaI43t+2tfqHi3E3mJ1h3SR4YQJh0FWMI
|
||||
ULcIW3GcOKxQ2r9mAh1JeAR9BtVRQnFF4ZBlnN4nwd0IdmFLofvFgnylIJ5lm1kT
|
||||
/aIkAhljKPiWZhDmLayMlxf+YgtzPhoUtGt5tPfmXPDosYv5BNl/7PD3gem+Gqmn
|
||||
KRb4Sxz+6jDDMCijk/+QSh9ri3rDCjoiwxgi1p7lFDZukbR02XSVUmrTZljmDOPg
|
||||
tnMXhVNbr0ftWBtraynEGEIKIJrAG/XWmlgL+9rQ/2VHRuPbBplfY1azIvAHcxag
|
||||
xK4xW0cA9HAj8WSmEt30WplLXCeHv8UY2FKSk3cPmBp7QIAwKxqdeFuUQTcsT8p6
|
||||
wDCwZuP8irD9JMmbaLn+GyRFJkbXfcmLnWEKO7LqtEP7tfKEb+Vz2kTKv488heQo
|
||||
AUVPJRBiZsrwcoWlGlQ2iWmM9bW3ZGkzhwIDAQABoAAwDQYJKoZIhvcNAQELBQAD
|
||||
ggIBABINqJhSOOh+ZnFFKCz0hRIKZdAiDYXtwNDY0nGS6ZUkCTIqbx8m+iGmAyn+
|
||||
zCtoN5AK73U6QMeMHKebL6Yfhjh5HvVWqRb/dbXwgasVusOQMXmYCvkLOuSKjSUf
|
||||
3jWhJrA9I1Vg41vfoZmyy3u6g7/uRmOgSAhVB0Dk44GAlzW0jpZIBveQ4H0M1PHV
|
||||
HGAXaZKLmmnHTpC3ilsaQTKF5yjVXMmJ85VnyMlo/Kxiv+XujKdt1Dp0BRk+IPpW
|
||||
DFNAY2joOAPOvJImH/7k0YrE3JZl11e4pyI1BO5SpKllWfPMhw0kqsTrtrnTeAgW
|
||||
eguP+fkXZaCeOyUyvatUMW7+lVcKu2Gqs/tPpn7PaAVyb1dTN5L9E32o6f9dJ7ew
|
||||
mD+mw+p+dKwhTSsf20irVZmNEYTyE40fnQQeR41fM5a8uGMxegIfKSwtWgDgEjE7
|
||||
z3L034/g+RQop0nyRhCb52HKlWHniGM/w+/S+2Rn6Ac7R7L3gAuNrdCLD9bWIcXe
|
||||
jaGQeh0zrp/TtXk/D+81KLdixGbWCzoeCggFzaFZUHf/4AT8lI6qZhp8IouBIL51
|
||||
b2A9Gz1yro0y6YaJai2HDDu7emVCCGgu45i8yxh09jwIR/MRjBWBDHw29xixbIiN
|
||||
af6SBracitghXnTxpOjLLfN1amp7i0CUe51HzxLblxNcnZM7
|
||||
-----END CERTIFICATE REQUEST-----
|
||||
27
infrastructure/tls/minio/san.cnf
Normal file
27
infrastructure/tls/minio/san.cnf
Normal file
@@ -0,0 +1,27 @@
|
||||
[req]
|
||||
distinguished_name = req_distinguished_name
|
||||
req_extensions = v3_req
|
||||
prompt = no
|
||||
|
||||
[req_distinguished_name]
|
||||
C = US
|
||||
ST = California
|
||||
L = SanFrancisco
|
||||
O = BakeryIA
|
||||
OU = Storage
|
||||
CN = minio.bakery-ia.svc.cluster.local
|
||||
|
||||
[v3_req]
|
||||
keyUsage = keyEncipherment, dataEncipherment
|
||||
extendedKeyUsage = serverAuth, clientAuth
|
||||
subjectAltName = @alt_names
|
||||
|
||||
[alt_names]
|
||||
DNS.1 = minio.bakery-ia.svc.cluster.local
|
||||
DNS.2 = minio.bakery-ia
|
||||
DNS.3 = minio-console.bakery-ia.svc.cluster.local
|
||||
DNS.4 = minio-console.bakery-ia
|
||||
DNS.5 = minio
|
||||
DNS.6 = minio-console
|
||||
DNS.7 = localhost
|
||||
IP.1 = 127.0.0.1
|
||||
270
scripts/cleanup_disk_space.py
Executable file
270
scripts/cleanup_disk_space.py
Executable file
@@ -0,0 +1,270 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bakery IA Disk Space Cleanup Script
|
||||
===================================
|
||||
|
||||
This script performs comprehensive cleanup of Docker and Kubernetes resources
|
||||
to prevent disk space exhaustion during development.
|
||||
|
||||
Features:
|
||||
- Automatic cleanup based on disk space thresholds
|
||||
- Manual cleanup on demand
|
||||
- Comprehensive resource cleanup (images, containers, volumes, etc.)
|
||||
- Detailed reporting and logging
|
||||
|
||||
Usage:
|
||||
./scripts/cleanup_disk_space.py [--manual] [--threshold GB] [--verbose]
|
||||
|
||||
Environment Variables:
|
||||
TILT_DISK_THRESHOLD_GB - Minimum free space required (default: 10GB)
|
||||
TILT_CLEANUP_VERBOSE - Set to "true" for verbose output
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
def get_disk_space():
|
||||
"""Get available disk space in GB"""
|
||||
try:
|
||||
result = subprocess.run(['df', '/', '--output=avail', '-h'],
|
||||
capture_output=True, text=True, check=True)
|
||||
# Extract numeric value from output like "15G"
|
||||
output = result.stdout.strip().split('\n')[-1].strip()
|
||||
if 'G' in output:
|
||||
return float(output.replace('G', ''))
|
||||
elif 'M' in output:
|
||||
return float(output.replace('M', '')) / 1024
|
||||
else:
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not check disk space: {e}")
|
||||
return 999 # Assume plenty of space if we can't check
|
||||
|
||||
def cleanup_docker_images(verbose=False):
|
||||
"""Clean up old and unused Docker images"""
|
||||
if verbose:
|
||||
print("🧹 Cleaning up Docker images...")
|
||||
|
||||
try:
|
||||
# Remove dangling images
|
||||
if verbose:
|
||||
print(" Removing dangling images...")
|
||||
subprocess.run(['docker', 'image', 'prune', '-f'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
# Remove unused images (not referenced by any container)
|
||||
if verbose:
|
||||
print(" Removing unused images...")
|
||||
subprocess.run(['docker', 'image', 'prune', '-a', '-f'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
# Remove old images (older than 2 hours)
|
||||
if verbose:
|
||||
print(" Removing old images (>2 hours)...")
|
||||
subprocess.run(['docker', 'image', 'prune', '-a', '-f',
|
||||
'--filter', 'until=2h'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
if verbose:
|
||||
print("✅ Docker image cleanup completed")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"⚠️ Docker image cleanup failed: {e}")
|
||||
return False
|
||||
|
||||
def cleanup_docker_containers(verbose=False):
|
||||
"""Clean up stopped containers"""
|
||||
if verbose:
|
||||
print("🧹 Cleaning up Docker containers...")
|
||||
|
||||
try:
|
||||
# Remove stopped containers
|
||||
if verbose:
|
||||
print(" Removing stopped containers...")
|
||||
subprocess.run(['docker', 'container', 'prune', '-f'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
# Remove old containers (older than 1 hour)
|
||||
if verbose:
|
||||
print(" Removing old containers (>1 hour)...")
|
||||
subprocess.run(['docker', 'container', 'prune', '-f',
|
||||
'--filter', 'until=1h'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
if verbose:
|
||||
print("✅ Docker container cleanup completed")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"⚠️ Docker container cleanup failed: {e}")
|
||||
return False
|
||||
|
||||
def cleanup_docker_volumes(verbose=False):
|
||||
"""Clean up unused volumes"""
|
||||
if verbose:
|
||||
print("🧹 Cleaning up Docker volumes...")
|
||||
|
||||
try:
|
||||
# Remove unused volumes
|
||||
if verbose:
|
||||
print(" Removing unused volumes...")
|
||||
subprocess.run(['docker', 'volume', 'prune', '-f'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
if verbose:
|
||||
print("✅ Docker volume cleanup completed")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"⚠️ Docker volume cleanup failed: {e}")
|
||||
return False
|
||||
|
||||
def cleanup_docker_system(verbose=False):
|
||||
"""Clean up Docker system (build cache, networks, etc.)"""
|
||||
if verbose:
|
||||
print("🧹 Cleaning up Docker system...")
|
||||
|
||||
try:
|
||||
# Remove build cache
|
||||
if verbose:
|
||||
print(" Removing build cache...")
|
||||
subprocess.run(['docker', 'builder', 'prune', '-f'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
# Remove unused networks
|
||||
if verbose:
|
||||
print(" Removing unused networks...")
|
||||
subprocess.run(['docker', 'network', 'prune', '-f'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
if verbose:
|
||||
print("✅ Docker system cleanup completed")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"⚠️ Docker system cleanup failed: {e}")
|
||||
return False
|
||||
|
||||
def cleanup_kubernetes_resources(verbose=False):
|
||||
"""Clean up Kubernetes resources"""
|
||||
if verbose:
|
||||
print("🧹 Cleaning up Kubernetes resources...")
|
||||
|
||||
try:
|
||||
# Remove completed jobs older than 1 hour
|
||||
if verbose:
|
||||
print(" Removing completed jobs (>1 hour)...")
|
||||
subprocess.run(['kubectl', 'delete', 'jobs', '-n', 'bakery-ia',
|
||||
'--field-selector=status.successful=1'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
# Remove failed jobs older than 1 hour
|
||||
if verbose:
|
||||
print(" Removing failed jobs (>1 hour)...")
|
||||
subprocess.run(['kubectl', 'delete', 'jobs', '-n', 'bakery-ia',
|
||||
'--field-selector=status.failed>0'],
|
||||
capture_output=True, text=True)
|
||||
|
||||
if verbose:
|
||||
print("✅ Kubernetes resource cleanup completed")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"⚠️ Kubernetes resource cleanup failed: {e}")
|
||||
return False
|
||||
|
||||
def perform_cleanup(manual=False, threshold_gb=10, verbose=False):
|
||||
"""Perform comprehensive cleanup"""
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("🚀 STARTING COMPREHENSIVE CLEANUP")
|
||||
print("="*60)
|
||||
|
||||
if manual:
|
||||
print("🎛️ Mode: MANUAL (forced cleanup)")
|
||||
else:
|
||||
print("🎛️ Mode: AUTOMATIC (threshold-based)")
|
||||
|
||||
print(f"📊 Threshold: {threshold_gb}GB free space")
|
||||
|
||||
# Check disk space before cleanup
|
||||
free_space_before = get_disk_space()
|
||||
print(f"📊 Disk space before cleanup: {free_space_before:.1f}GB free")
|
||||
|
||||
# Check if cleanup is needed (unless manual)
|
||||
if not manual and free_space_before >= threshold_gb:
|
||||
print("✅ Sufficient disk space available, skipping cleanup")
|
||||
return True
|
||||
|
||||
cleanup_results = []
|
||||
|
||||
# Perform all cleanup operations
|
||||
cleanup_results.append(("Docker Images", cleanup_docker_images(verbose)))
|
||||
cleanup_results.append(("Docker Containers", cleanup_docker_containers(verbose)))
|
||||
cleanup_results.append(("Docker Volumes", cleanup_docker_volumes(verbose)))
|
||||
cleanup_results.append(("Docker System", cleanup_docker_system(verbose)))
|
||||
cleanup_results.append(("Kubernetes Resources", cleanup_kubernetes_resources(verbose)))
|
||||
|
||||
# Check disk space after cleanup
|
||||
free_space_after = get_disk_space()
|
||||
space_reclaimed = free_space_after - free_space_before
|
||||
|
||||
print(f"\n📊 Disk space after cleanup: {free_space_after:.1f}GB free")
|
||||
print(f"🎯 Space reclaimed: {space_reclaimed:.1f}GB")
|
||||
|
||||
# Summary
|
||||
print("\n📋 CLEANUP SUMMARY:")
|
||||
for name, success in cleanup_results:
|
||||
status = "✅ SUCCESS" if success else "❌ FAILED"
|
||||
print(f" {name}: {status}")
|
||||
|
||||
print("="*60)
|
||||
print("✅ CLEANUP COMPLETED")
|
||||
print("="*60 + "\n")
|
||||
|
||||
return True
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Bakery IA Disk Space Cleanup Script',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
./cleanup_disk_space.py # Automatic cleanup (checks threshold)
|
||||
./cleanup_disk_space.py --manual # Force cleanup regardless of threshold
|
||||
./cleanup_disk_space.py --threshold 5 # Use 5GB threshold
|
||||
./cleanup_disk_space.py --verbose # Verbose output
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--manual', action='store_true',
|
||||
help='Force cleanup regardless of disk space threshold')
|
||||
parser.add_argument('--threshold', type=int, default=10,
|
||||
help='Minimum free space required in GB (default: 10)')
|
||||
parser.add_argument('--verbose', action='store_true',
|
||||
help='Enable verbose output')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get threshold from environment variable if set
|
||||
env_threshold = os.getenv('TILT_DISK_THRESHOLD_GB')
|
||||
if env_threshold:
|
||||
try:
|
||||
args.threshold = int(env_threshold)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Get verbose from environment variable if set
|
||||
env_verbose = os.getenv('TILT_CLEANUP_VERBOSE', 'false').lower()
|
||||
if env_verbose == 'true':
|
||||
args.verbose = True
|
||||
|
||||
return perform_cleanup(
|
||||
manual=args.manual,
|
||||
threshold_gb=args.threshold,
|
||||
verbose=args.verbose
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
@@ -1,10 +1,10 @@
|
||||
# Forecasting Dockerfile
|
||||
# Add this stage at the top of each service Dockerfile
|
||||
# Forecasting Service Dockerfile with MinIO Support
|
||||
# Multi-stage build for optimized production image
|
||||
FROM python:3.11-slim AS shared
|
||||
WORKDIR /shared
|
||||
COPY shared/ /shared/
|
||||
|
||||
# Then your main service stage
|
||||
# Main service stage
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@@ -49,6 +49,18 @@ class ForecastingSettings(BaseServiceSettings):
|
||||
PREDICTION_CACHE_TTL_HOURS: int = int(os.getenv("PREDICTION_CACHE_TTL_HOURS", "6"))
|
||||
FORECAST_BATCH_SIZE: int = int(os.getenv("FORECAST_BATCH_SIZE", "100"))
|
||||
|
||||
# MinIO Configuration
|
||||
MINIO_ENDPOINT: str = os.getenv("MINIO_ENDPOINT", "minio.bakery-ia.svc.cluster.local:9000")
|
||||
MINIO_ACCESS_KEY: str = os.getenv("FORECASTING_MINIO_ACCESS_KEY", "forecasting-service")
|
||||
MINIO_SECRET_KEY: str = os.getenv("FORECASTING_MINIO_SECRET_KEY", "forecasting-secret-key")
|
||||
MINIO_USE_SSL: bool = os.getenv("MINIO_USE_SSL", "true").lower() == "true"
|
||||
MINIO_MODEL_BUCKET: str = os.getenv("MINIO_MODEL_BUCKET", "training-models")
|
||||
MINIO_CONSOLE_PORT: str = os.getenv("MINIO_CONSOLE_PORT", "9001")
|
||||
MINIO_API_PORT: str = os.getenv("MINIO_API_PORT", "9000")
|
||||
MINIO_REGION: str = os.getenv("MINIO_REGION", "us-east-1")
|
||||
MINIO_MODEL_LIFECYCLE_DAYS: int = int(os.getenv("MINIO_MODEL_LIFECYCLE_DAYS", "90"))
|
||||
MINIO_CACHE_TTL_SECONDS: int = int(os.getenv("MINIO_CACHE_TTL_SECONDS", "3600"))
|
||||
|
||||
# Real-time Forecasting
|
||||
REALTIME_FORECASTING_ENABLED: bool = os.getenv("REALTIME_FORECASTING_ENABLED", "true").lower() == "true"
|
||||
FORECAST_UPDATE_INTERVAL_HOURS: int = int(os.getenv("FORECAST_UPDATE_INTERVAL_HOURS", "6"))
|
||||
|
||||
@@ -16,6 +16,7 @@ import httpx
|
||||
from pathlib import Path
|
||||
import os
|
||||
import joblib
|
||||
import io
|
||||
|
||||
from app.core.config import settings
|
||||
from shared.monitoring.metrics import MetricsCollector
|
||||
@@ -578,118 +579,114 @@ class PredictionService:
|
||||
return adjusted
|
||||
|
||||
async def _load_model(self, model_id: str, model_path: str):
|
||||
"""Load model from file with improved validation and error handling"""
|
||||
|
||||
# Enhanced model file validation
|
||||
if not await self._validate_model_file(model_path):
|
||||
logger.error(f"Model file not valid: {model_path}")
|
||||
return None
|
||||
|
||||
"""Load model from MinIO with improved validation and error handling"""
|
||||
|
||||
# Check cache first
|
||||
if model_id in self.model_cache:
|
||||
cached_model, cached_time = self.model_cache[model_id]
|
||||
if (datetime.now() - cached_time).seconds < self.cache_ttl:
|
||||
logger.debug(f"Model loaded from cache: {model_id}")
|
||||
return cached_model
|
||||
|
||||
|
||||
# Validate MinIO path format
|
||||
if not await self._validate_model_file(model_path):
|
||||
logger.error(f"Model path not valid: {model_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
if os.path.exists(model_path):
|
||||
# Try multiple loading methods for compatibility
|
||||
model = await self._load_model_safely(model_path)
|
||||
|
||||
if model is None:
|
||||
logger.error(f"Failed to load model from: {model_path}")
|
||||
return None
|
||||
|
||||
# Cache the model
|
||||
self.model_cache[model_id] = (model, datetime.now())
|
||||
logger.info(f"Model loaded successfully: {model_path}")
|
||||
return model
|
||||
else:
|
||||
logger.error(f"Model file not found: {model_path}")
|
||||
# Load from MinIO
|
||||
model = await self._load_model_safely(model_path)
|
||||
|
||||
if model is None:
|
||||
logger.error(f"Failed to load model from MinIO: {model_path}")
|
||||
return None
|
||||
|
||||
|
||||
# Cache the model
|
||||
self.model_cache[model_id] = (model, datetime.now())
|
||||
logger.info(f"Model loaded successfully from MinIO: {model_path}")
|
||||
return model
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading model: {e}")
|
||||
logger.error(f"Error loading model from MinIO: {e}")
|
||||
return None
|
||||
|
||||
async def _load_model_safely(self, model_path: str):
|
||||
"""Safely load model with multiple fallback methods"""
|
||||
|
||||
# Method 1: Try joblib first (recommended for sklearn/Prophet models)
|
||||
"""Load model from MinIO storage (clean implementation - MinIO only)"""
|
||||
try:
|
||||
logger.debug(f"Attempting to load model with joblib: {model_path}")
|
||||
model = joblib.load(model_path)
|
||||
logger.info(f"Model loaded successfully with joblib")
|
||||
return model
|
||||
# Parse MinIO path: minio://bucket_name/object_path
|
||||
_, bucket_and_path = model_path.split("://", 1)
|
||||
bucket_name, object_name = bucket_and_path.split("/", 1)
|
||||
|
||||
logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
|
||||
|
||||
# Use MinIO client
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
# Download model data
|
||||
model_data = minio_client.get_object(bucket_name, object_name)
|
||||
if not model_data:
|
||||
logger.error(f"Failed to download model from MinIO: {model_path}")
|
||||
return None
|
||||
|
||||
# Try joblib first (using BytesIO since joblib.load reads from file-like objects)
|
||||
try:
|
||||
buffer = io.BytesIO(model_data)
|
||||
model = joblib.load(buffer)
|
||||
logger.info(f"Model loaded successfully from MinIO with joblib")
|
||||
return model
|
||||
except Exception as e:
|
||||
logger.warning(f"Joblib loading from MinIO failed: {e}")
|
||||
|
||||
# Try pickle as fallback
|
||||
try:
|
||||
model = pickle.loads(model_data)
|
||||
logger.info(f"Model loaded successfully from MinIO with pickle")
|
||||
return model
|
||||
except Exception as e:
|
||||
logger.warning(f"Pickle loading from MinIO failed: {e}")
|
||||
|
||||
logger.error(f"All loading methods failed for MinIO object: {model_path}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Joblib loading failed: {e}")
|
||||
|
||||
# Method 2: Try pickle as fallback
|
||||
try:
|
||||
logger.debug(f"Attempting to load model with pickle: {model_path}")
|
||||
with open(model_path, 'rb') as f:
|
||||
model = pickle.load(f)
|
||||
logger.info(f"Model loaded successfully with pickle")
|
||||
return model
|
||||
except Exception as e:
|
||||
logger.warning(f"Pickle loading failed: {e}")
|
||||
|
||||
# Method 3: Try pandas pickle (for Prophet models saved with pandas)
|
||||
try:
|
||||
logger.debug(f"Attempting to load model with pandas: {model_path}")
|
||||
import pandas as pd
|
||||
model = pd.read_pickle(model_path)
|
||||
logger.info(f"Model loaded successfully with pandas")
|
||||
return model
|
||||
except Exception as e:
|
||||
logger.warning(f"Pandas loading failed: {e}")
|
||||
|
||||
logger.error(f"All loading methods failed for: {model_path}")
|
||||
return None
|
||||
logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
|
||||
return None
|
||||
|
||||
async def _validate_model_file(self, model_path: str) -> bool:
|
||||
"""Enhanced model file validation"""
|
||||
"""Validate MinIO model path and check object exists"""
|
||||
try:
|
||||
if not os.path.exists(model_path):
|
||||
logger.error(f"Model file not found: {model_path}")
|
||||
# Validate MinIO path format
|
||||
if not model_path.startswith("minio://"):
|
||||
logger.error(f"Invalid model path format (expected minio://): {model_path}")
|
||||
return False
|
||||
|
||||
# Check file size (should be > 1KB for a trained model)
|
||||
file_size = os.path.getsize(model_path)
|
||||
if file_size < 1024:
|
||||
logger.warning(f"Model file too small ({file_size} bytes): {model_path}")
|
||||
return False
|
||||
|
||||
# More comprehensive file format detection
|
||||
|
||||
# Parse MinIO path
|
||||
try:
|
||||
with open(model_path, 'rb') as f:
|
||||
header = f.read(16) # Read more bytes for better detection
|
||||
|
||||
# Check for various pickle/joblib signatures
|
||||
valid_signatures = [
|
||||
b']\x93PICKLE', # Joblib
|
||||
b'\x80\x03', # Pickle protocol 3
|
||||
b'\x80\x04', # Pickle protocol 4
|
||||
b'\x80\x05', # Pickle protocol 5
|
||||
b'}\x94', # Newer joblib format
|
||||
b'}\x93', # Alternative joblib format
|
||||
]
|
||||
|
||||
is_valid_format = any(header.startswith(sig) for sig in valid_signatures)
|
||||
|
||||
if not is_valid_format:
|
||||
# Log header for debugging but don't fail validation
|
||||
logger.warning(f"Unrecognized file header: {header[:8]} for {model_path}")
|
||||
logger.info("Proceeding with loading attempt despite unrecognized header")
|
||||
# Return True to allow loading attempt - some valid files may have different headers
|
||||
return True
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading model file header: {e}")
|
||||
_, bucket_and_path = model_path.split("://", 1)
|
||||
bucket_name, object_name = bucket_and_path.split("/", 1)
|
||||
except ValueError:
|
||||
logger.error(f"Cannot parse MinIO path: {model_path}")
|
||||
return False
|
||||
|
||||
|
||||
# Check if object exists in MinIO
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
if not minio_client.object_exists(bucket_name, object_name):
|
||||
logger.error(f"Model object not found in MinIO: {bucket_name}/{object_name}")
|
||||
return False
|
||||
|
||||
# Check object metadata for size validation
|
||||
metadata = minio_client.get_object_metadata(bucket_name, object_name)
|
||||
if metadata:
|
||||
file_size = metadata.get("size", 0)
|
||||
if file_size < 1024:
|
||||
logger.warning(f"Model object too small ({file_size} bytes): {model_path}")
|
||||
return False
|
||||
|
||||
logger.debug(f"Model validated in MinIO: {bucket_name}/{object_name}, size={file_size}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Model validation error: {e}")
|
||||
return False
|
||||
|
||||
@@ -31,6 +31,7 @@ scikit-learn==1.6.1
|
||||
pandas==2.2.3
|
||||
numpy==2.2.2
|
||||
joblib==1.4.2
|
||||
minio==7.2.2
|
||||
|
||||
# Messaging
|
||||
aio-pika==9.4.3
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# Training Dockerfile
|
||||
# Add this stage at the top of each service Dockerfile
|
||||
# Training Service Dockerfile with MinIO Support
|
||||
# Multi-stage build for optimized production image
|
||||
FROM python:3.11-slim AS shared
|
||||
WORKDIR /shared
|
||||
COPY shared/ /shared/
|
||||
|
||||
# Then your main service stage
|
||||
# Main service stage
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@@ -116,29 +116,51 @@ async def broadcast_training_progress(job_id: str, progress: dict):
|
||||
await websocket_manager.broadcast(job_id, message)
|
||||
```
|
||||
|
||||
### Model Artifact Management
|
||||
### Model Artifact Management (MinIO Storage)
|
||||
|
||||
```python
|
||||
# Model storage and retrieval
|
||||
# Model storage and retrieval using MinIO
|
||||
import joblib
|
||||
from pathlib import Path
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
# Save trained model
|
||||
# Save trained model to MinIO
|
||||
def save_model_artifact(model: Prophet, tenant_id: str, product_id: str) -> str:
|
||||
"""Serialize and store model"""
|
||||
model_dir = Path(f"/models/{tenant_id}/{product_id}")
|
||||
model_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
"""Serialize and store model in MinIO"""
|
||||
import io
|
||||
version = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||
model_path = model_dir / f"model_v{version}.pkl"
|
||||
model_id = str(uuid.uuid4())
|
||||
object_name = f"models/{tenant_id}/{product_id}/{model_id}.pkl"
|
||||
|
||||
joblib.dump(model, model_path)
|
||||
return str(model_path)
|
||||
# Serialize model (joblib.dump writes to file-like objects)
|
||||
buffer = io.BytesIO()
|
||||
joblib.dump(model, buffer)
|
||||
model_data = buffer.getvalue()
|
||||
|
||||
# Load trained model
|
||||
# Upload to MinIO
|
||||
minio_client.put_object(
|
||||
bucket_name="training-models",
|
||||
object_name=object_name,
|
||||
data=model_data,
|
||||
content_type="application/octet-stream"
|
||||
)
|
||||
|
||||
# Return MinIO path
|
||||
return f"minio://training-models/{object_name}"
|
||||
|
||||
# Load trained model from MinIO
|
||||
def load_model_artifact(model_path: str) -> Prophet:
|
||||
"""Load serialized model"""
|
||||
return joblib.load(model_path)
|
||||
"""Load serialized model from MinIO"""
|
||||
import io
|
||||
# Parse MinIO path: minio://bucket_name/object_path
|
||||
_, bucket_and_path = model_path.split("://", 1)
|
||||
bucket_name, object_name = bucket_and_path.split("/", 1)
|
||||
|
||||
# Download from MinIO
|
||||
model_data = minio_client.get_object(bucket_name, object_name)
|
||||
|
||||
# Deserialize (joblib.load reads from file-like objects)
|
||||
buffer = io.BytesIO(model_data)
|
||||
return joblib.load(buffer)
|
||||
```
|
||||
|
||||
### Performance Metrics Calculation
|
||||
@@ -194,8 +216,8 @@ def calculate_performance_metrics(model: Prophet, actual_data: pd.DataFrame) ->
|
||||
- **Framework**: FastAPI (Python 3.11+) - Async web framework with WebSocket support
|
||||
- **Database**: PostgreSQL 17 - Training logs, model metadata, job queue
|
||||
- **ML Library**: Prophet (fbprophet) - Time series forecasting
|
||||
- **Model Storage**: Joblib - Model serialization
|
||||
- **File System**: Persistent volumes - Model artifact storage
|
||||
- **Model Storage**: MinIO (S3-compatible) - Distributed object storage with TLS
|
||||
- **Serialization**: Joblib - Model serialization
|
||||
- **WebSocket**: FastAPI WebSocket - Real-time progress updates
|
||||
- **Messaging**: RabbitMQ 4.1 - Training completion events
|
||||
- **ORM**: SQLAlchemy 2.0 (async) - Database abstraction
|
||||
@@ -442,7 +464,13 @@ websocket_messages_sent = Counter(
|
||||
- `PORT` - Service port (default: 8004)
|
||||
- `DATABASE_URL` - PostgreSQL connection string
|
||||
- `RABBITMQ_URL` - RabbitMQ connection string
|
||||
- `MODEL_STORAGE_PATH` - Path for model artifacts (default: /models)
|
||||
|
||||
**MinIO Configuration:**
|
||||
- `MINIO_ENDPOINT` - MinIO server endpoint (default: minio.bakery-ia.svc.cluster.local:9000)
|
||||
- `MINIO_ACCESS_KEY` - MinIO access key
|
||||
- `MINIO_SECRET_KEY` - MinIO secret key
|
||||
- `MINIO_USE_SSL` - Enable TLS (default: true)
|
||||
- `MINIO_MODEL_BUCKET` - Bucket for models (default: training-models)
|
||||
|
||||
**Training Configuration:**
|
||||
- `MAX_CONCURRENT_JOBS` - Maximum parallel training jobs (default: 3)
|
||||
@@ -462,10 +490,9 @@ websocket_messages_sent = Counter(
|
||||
- `WEBSOCKET_MAX_CONNECTIONS` - Max connections per tenant (default: 10)
|
||||
- `WEBSOCKET_MESSAGE_QUEUE_SIZE` - Message buffer size (default: 100)
|
||||
|
||||
**Storage Configuration:**
|
||||
- `MODEL_RETENTION_DAYS` - Days to keep old models (default: 90)
|
||||
- `MAX_MODEL_VERSIONS_PER_PRODUCT` - Version limit (default: 10)
|
||||
- `ENABLE_MODEL_COMPRESSION` - Compress model files (default: true)
|
||||
**Storage Configuration (MinIO):**
|
||||
- `MINIO_MODEL_LIFECYCLE_DAYS` - Days to keep old model versions (default: 90)
|
||||
- `MINIO_CACHE_TTL_SECONDS` - Model cache TTL in seconds (default: 3600)
|
||||
|
||||
## Development Setup
|
||||
|
||||
@@ -473,7 +500,7 @@ websocket_messages_sent = Counter(
|
||||
- Python 3.11+
|
||||
- PostgreSQL 17
|
||||
- RabbitMQ 4.1
|
||||
- Persistent storage for model artifacts
|
||||
- MinIO (S3-compatible object storage)
|
||||
|
||||
### Local Development
|
||||
```bash
|
||||
@@ -488,10 +515,13 @@ pip install -r requirements.txt
|
||||
# Set environment variables
|
||||
export DATABASE_URL=postgresql://user:pass@localhost:5432/training
|
||||
export RABBITMQ_URL=amqp://guest:guest@localhost:5672/
|
||||
export MODEL_STORAGE_PATH=/tmp/models
|
||||
export MINIO_ENDPOINT=localhost:9000
|
||||
export MINIO_ACCESS_KEY=minioadmin
|
||||
export MINIO_SECRET_KEY=minioadmin
|
||||
export MINIO_USE_SSL=false # Use true in production
|
||||
|
||||
# Create model storage directory
|
||||
mkdir -p /tmp/models
|
||||
# Start MinIO locally (if not using K8s)
|
||||
docker run -p 9000:9000 -p 9001:9001 minio/minio server /data --console-address ":9001"
|
||||
|
||||
# Run database migrations
|
||||
alembic upgrade head
|
||||
@@ -590,7 +620,7 @@ for feature_name in poi_features.keys():
|
||||
- **External Service** - Fetch weather, traffic, holiday, and POI feature data
|
||||
- **PostgreSQL** - Store job queue, models, metrics, logs
|
||||
- **RabbitMQ** - Publish training completion events
|
||||
- **File System** - Store model artifacts
|
||||
- **MinIO** - Store model artifacts (S3-compatible object storage with TLS)
|
||||
|
||||
### Dependents (Services That Call This)
|
||||
- **Forecasting Service** - Load trained models for predictions
|
||||
@@ -627,11 +657,11 @@ for feature_name in poi_features.keys():
|
||||
4. **Resource Limits** - CPU/memory limits per training job
|
||||
5. **Priority Queue** - Prioritize important products first
|
||||
|
||||
### Storage Optimization
|
||||
1. **Model Compression** - Compress model artifacts (gzip)
|
||||
2. **Old Model Cleanup** - Automatic deletion after retention period
|
||||
3. **Version Limits** - Keep only N most recent versions
|
||||
4. **Deduplication** - Avoid storing identical models
|
||||
### Storage Optimization (MinIO)
|
||||
1. **Object Versioning** - MinIO maintains version history automatically
|
||||
2. **Lifecycle Policies** - Auto-cleanup old versions after 90 days
|
||||
3. **TLS Encryption** - Secure communication with MinIO
|
||||
4. **Distributed Storage** - MinIO handles replication and availability
|
||||
|
||||
### WebSocket Optimization
|
||||
1. **Message Batching** - Batch progress updates (every 2 seconds)
|
||||
|
||||
@@ -96,48 +96,48 @@ def check_system_resources() -> Dict[str, Any]:
|
||||
|
||||
|
||||
def check_model_storage() -> Dict[str, Any]:
|
||||
"""Check model storage health"""
|
||||
"""Check MinIO model storage health"""
|
||||
try:
|
||||
storage_path = settings.MODEL_STORAGE_PATH
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
if not os.path.exists(storage_path):
|
||||
# Check MinIO connectivity
|
||||
if not minio_client.health_check():
|
||||
return {
|
||||
"status": "warning",
|
||||
"message": f"Model storage path does not exist: {storage_path}"
|
||||
"status": "unhealthy",
|
||||
"message": "MinIO service is not reachable",
|
||||
"storage_type": "minio"
|
||||
}
|
||||
|
||||
# Check if writable
|
||||
test_file = os.path.join(storage_path, ".health_check")
|
||||
try:
|
||||
with open(test_file, 'w') as f:
|
||||
f.write("test")
|
||||
os.remove(test_file)
|
||||
writable = True
|
||||
except Exception:
|
||||
writable = False
|
||||
bucket_name = settings.MINIO_MODEL_BUCKET
|
||||
|
||||
# Count model files
|
||||
model_files = 0
|
||||
total_size = 0
|
||||
for root, dirs, files in os.walk(storage_path):
|
||||
for file in files:
|
||||
if file.endswith('.pkl'):
|
||||
model_files += 1
|
||||
file_path = os.path.join(root, file)
|
||||
total_size += os.path.getsize(file_path)
|
||||
# Check if bucket exists
|
||||
bucket_exists = minio_client.bucket_exists(bucket_name)
|
||||
if not bucket_exists:
|
||||
return {
|
||||
"status": "warning",
|
||||
"message": f"MinIO bucket does not exist: {bucket_name}",
|
||||
"storage_type": "minio"
|
||||
}
|
||||
|
||||
# Count model files in MinIO
|
||||
model_objects = minio_client.list_objects(bucket_name, prefix="models/")
|
||||
model_files = [obj for obj in model_objects if obj.endswith('.pkl')]
|
||||
|
||||
return {
|
||||
"status": "healthy" if writable else "degraded",
|
||||
"path": storage_path,
|
||||
"writable": writable,
|
||||
"model_files": model_files,
|
||||
"total_size_mb": round(total_size / 1024 / 1024, 2)
|
||||
"status": "healthy",
|
||||
"storage_type": "minio",
|
||||
"endpoint": settings.MINIO_ENDPOINT,
|
||||
"bucket": bucket_name,
|
||||
"use_ssl": settings.MINIO_USE_SSL,
|
||||
"model_files": len(model_files),
|
||||
"bucket_exists": bucket_exists
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Model storage check failed: {e}")
|
||||
logger.error(f"MinIO storage check failed: {e}")
|
||||
return {
|
||||
"status": "error",
|
||||
"storage_type": "minio",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ from app.services.training_service import EnhancedTrainingService
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import select, delete, func
|
||||
import uuid
|
||||
import shutil
|
||||
|
||||
from shared.auth.decorators import (
|
||||
get_current_user_dep,
|
||||
@@ -304,10 +303,9 @@ async def delete_tenant_models_complete(
|
||||
"jobs_cancelled": 0,
|
||||
"models_deleted": 0,
|
||||
"artifacts_deleted": 0,
|
||||
"artifacts_files_deleted": 0,
|
||||
"minio_objects_deleted": 0,
|
||||
"training_logs_deleted": 0,
|
||||
"performance_metrics_deleted": 0,
|
||||
"storage_freed_bytes": 0,
|
||||
"errors": []
|
||||
}
|
||||
|
||||
@@ -336,51 +334,35 @@ async def delete_tenant_models_complete(
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
# Step 2: Delete model artifact files from storage
|
||||
# Step 2: Delete model artifact files from MinIO storage
|
||||
try:
|
||||
artifacts_query = select(ModelArtifact).where(
|
||||
ModelArtifact.tenant_id == tenant_uuid
|
||||
)
|
||||
artifacts_result = await db.execute(artifacts_query)
|
||||
artifacts = artifacts_result.scalars().all()
|
||||
|
||||
storage_freed = 0
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
bucket_name = settings.MINIO_MODEL_BUCKET
|
||||
prefix = f"models/{tenant_id}/"
|
||||
|
||||
# List all objects for this tenant
|
||||
objects_to_delete = minio_client.list_objects(bucket_name, prefix=prefix)
|
||||
|
||||
files_deleted = 0
|
||||
|
||||
for artifact in artifacts:
|
||||
for obj_name in objects_to_delete:
|
||||
try:
|
||||
file_path = Path(artifact.file_path)
|
||||
if file_path.exists():
|
||||
file_size = file_path.stat().st_size
|
||||
file_path.unlink() # Delete file
|
||||
storage_freed += file_size
|
||||
files_deleted += 1
|
||||
logger.debug("Deleted artifact file",
|
||||
file_path=str(file_path),
|
||||
size_bytes=file_size)
|
||||
|
||||
# Also try to delete parent directories if empty
|
||||
try:
|
||||
if file_path.parent.exists() and not any(file_path.parent.iterdir()):
|
||||
file_path.parent.rmdir()
|
||||
except:
|
||||
pass # Ignore errors cleaning up directories
|
||||
|
||||
minio_client.delete_object(bucket_name, obj_name)
|
||||
files_deleted += 1
|
||||
logger.debug("Deleted MinIO object", object_name=obj_name)
|
||||
except Exception as e:
|
||||
error_msg = f"Error deleting artifact file {artifact.file_path}: {str(e)}"
|
||||
error_msg = f"Error deleting MinIO object {obj_name}: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.warning(error_msg)
|
||||
|
||||
deletion_stats["artifacts_files_deleted"] = files_deleted
|
||||
deletion_stats["storage_freed_bytes"] = storage_freed
|
||||
|
||||
logger.info("Deleted artifact files",
|
||||
|
||||
deletion_stats["minio_objects_deleted"] = files_deleted
|
||||
|
||||
logger.info("Deleted MinIO objects",
|
||||
tenant_id=tenant_id,
|
||||
files_deleted=files_deleted,
|
||||
storage_freed_mb=storage_freed / (1024 * 1024))
|
||||
|
||||
files_deleted=files_deleted)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error processing artifact files: {str(e)}"
|
||||
error_msg = f"Error processing MinIO objects: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
@@ -463,19 +445,7 @@ async def delete_tenant_models_complete(
|
||||
detail=error_msg
|
||||
)
|
||||
|
||||
# Step 4: Clean up tenant model directory
|
||||
try:
|
||||
tenant_model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
|
||||
if tenant_model_dir.exists():
|
||||
shutil.rmtree(tenant_model_dir)
|
||||
logger.info("Deleted tenant model directory",
|
||||
directory=str(tenant_model_dir))
|
||||
except Exception as e:
|
||||
error_msg = f"Error deleting model directory: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.warning(error_msg)
|
||||
|
||||
# Models deleted successfully
|
||||
# Step 4: Models deleted successfully (MinIO cleanup already done in Step 2)
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"All training data for tenant {tenant_id} deleted successfully",
|
||||
|
||||
@@ -44,6 +44,18 @@ class TrainingSettings(BaseServiceSettings):
|
||||
MODEL_BACKUP_ENABLED: bool = os.getenv("MODEL_BACKUP_ENABLED", "true").lower() == "true"
|
||||
MODEL_VERSIONING_ENABLED: bool = os.getenv("MODEL_VERSIONING_ENABLED", "true").lower() == "true"
|
||||
|
||||
# MinIO Configuration
|
||||
MINIO_ENDPOINT: str = os.getenv("MINIO_ENDPOINT", "minio.bakery-ia.svc.cluster.local:9000")
|
||||
MINIO_ACCESS_KEY: str = os.getenv("MINIO_ACCESS_KEY", "training-service")
|
||||
MINIO_SECRET_KEY: str = os.getenv("MINIO_SECRET_KEY", "training-secret-key")
|
||||
MINIO_USE_SSL: bool = os.getenv("MINIO_USE_SSL", "true").lower() == "true"
|
||||
MINIO_MODEL_BUCKET: str = os.getenv("MINIO_MODEL_BUCKET", "training-models")
|
||||
MINIO_CONSOLE_PORT: str = os.getenv("MINIO_CONSOLE_PORT", "9001")
|
||||
MINIO_API_PORT: str = os.getenv("MINIO_API_PORT", "9000")
|
||||
MINIO_REGION: str = os.getenv("MINIO_REGION", "us-east-1")
|
||||
MINIO_MODEL_LIFECYCLE_DAYS: int = int(os.getenv("MINIO_MODEL_LIFECYCLE_DAYS", "90"))
|
||||
MINIO_CACHE_TTL_SECONDS: int = int(os.getenv("MINIO_CACHE_TTL_SECONDS", "3600"))
|
||||
|
||||
# Training Configuration
|
||||
MAX_CONCURRENT_TRAINING_JOBS: int = int(os.getenv("MAX_CONCURRENT_TRAINING_JOBS", "3"))
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Combines Prophet's seasonality modeling with XGBoost's pattern learning
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import io
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
from datetime import datetime, timezone
|
||||
@@ -110,8 +111,8 @@ class HybridProphetXGBoost:
|
||||
|
||||
# Step 4: Get Prophet predictions on training data
|
||||
logger.info("Step 3: Generating Prophet predictions for residual calculation")
|
||||
train_prophet_pred = self._get_prophet_predictions(prophet_result, train_df)
|
||||
val_prophet_pred = self._get_prophet_predictions(prophet_result, val_df)
|
||||
train_prophet_pred = await self._get_prophet_predictions(prophet_result, train_df)
|
||||
val_prophet_pred = await self._get_prophet_predictions(prophet_result, val_df)
|
||||
|
||||
# Step 5: Calculate residuals (actual - prophet_prediction)
|
||||
train_residuals = train_df['y'].values - train_prophet_pred
|
||||
@@ -207,7 +208,7 @@ class HybridProphetXGBoost:
|
||||
|
||||
return df_enhanced
|
||||
|
||||
def _get_prophet_predictions(
|
||||
async def _get_prophet_predictions(
|
||||
self,
|
||||
prophet_result: Dict[str, Any],
|
||||
df: pd.DataFrame
|
||||
@@ -230,8 +231,13 @@ class HybridProphetXGBoost:
|
||||
|
||||
# Load the actual Prophet model from the stored path
|
||||
try:
|
||||
import joblib
|
||||
prophet_model = joblib.load(model_path)
|
||||
if model_path.startswith("minio://"):
|
||||
# Use prophet_manager to load from MinIO
|
||||
prophet_model = await self.prophet_manager._load_model_from_minio(model_path)
|
||||
else:
|
||||
# Fallback to direct loading for local paths
|
||||
import joblib
|
||||
prophet_model = joblib.load(model_path)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load Prophet model from path {model_path}: {str(e)}")
|
||||
|
||||
@@ -417,8 +423,13 @@ class HybridProphetXGBoost:
|
||||
|
||||
# Load the Prophet model from the stored path
|
||||
try:
|
||||
import joblib
|
||||
prophet_model = joblib.load(prophet_model_path)
|
||||
if prophet_model_path.startswith("minio://"):
|
||||
# Use prophet_manager to load from MinIO
|
||||
prophet_model = await self.prophet_manager._load_model_from_minio(prophet_model_path)
|
||||
else:
|
||||
# Fallback to direct loading for local paths
|
||||
import joblib
|
||||
prophet_model = joblib.load(prophet_model_path)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load Prophet model from path {prophet_model_path}: {str(e)}")
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ from datetime import datetime, timedelta
|
||||
import uuid
|
||||
import os
|
||||
import joblib
|
||||
import io
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||
from sklearn.model_selection import TimeSeriesSplit
|
||||
import json
|
||||
@@ -85,9 +86,24 @@ class BakeryProphetManager:
|
||||
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "training-service")
|
||||
self.db_session = None # Will be set when session is available
|
||||
|
||||
# Ensure model storage directory exists
|
||||
os.makedirs(settings.MODEL_STORAGE_PATH, exist_ok=True)
|
||||
|
||||
# Initialize MinIO client and ensure bucket exists
|
||||
from shared.clients.minio_client import minio_client
|
||||
self.minio_client = minio_client
|
||||
self._ensure_minio_bucket()
|
||||
|
||||
def _ensure_minio_bucket(self):
|
||||
"""Ensure the training-models bucket exists in MinIO"""
|
||||
try:
|
||||
bucket_name = settings.MINIO_MODEL_BUCKET
|
||||
if not self.minio_client.bucket_exists(bucket_name):
|
||||
self.minio_client.create_bucket(bucket_name)
|
||||
logger.info(f"Created MinIO bucket: {bucket_name}")
|
||||
else:
|
||||
logger.debug(f"MinIO bucket already exists: {bucket_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to ensure MinIO bucket exists: {e}")
|
||||
# Don't raise - bucket might be created by init job
|
||||
|
||||
async def train_bakery_model(self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
@@ -706,18 +722,40 @@ class BakeryProphetManager:
|
||||
session = None) -> str:
|
||||
"""Store model with database integration"""
|
||||
|
||||
# Create model directory
|
||||
model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
|
||||
model_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Store model in MinIO (clean implementation - MinIO only)
|
||||
# Use BytesIO buffer since joblib.dump() writes to file-like objects
|
||||
buffer = io.BytesIO()
|
||||
joblib.dump(model, buffer)
|
||||
model_data = buffer.getvalue()
|
||||
object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.pkl"
|
||||
|
||||
# Use MinIO client
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
# Upload model to MinIO
|
||||
success = minio_client.put_object(
|
||||
bucket_name="training-models",
|
||||
object_name=object_name,
|
||||
data=model_data,
|
||||
content_type="application/octet-stream",
|
||||
metadata={
|
||||
"model_id": model_id,
|
||||
"tenant_id": tenant_id,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"model_type": "prophet_optimized"
|
||||
}
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise Exception("Failed to upload model to MinIO")
|
||||
|
||||
# Return MinIO object path
|
||||
model_path = f"minio://training-models/{object_name}"
|
||||
|
||||
# Calculate checksum for model data
|
||||
import hashlib
|
||||
model_checksum = hashlib.sha256(model_data).hexdigest()
|
||||
|
||||
# Store model file
|
||||
model_path = model_dir / f"{model_id}.pkl"
|
||||
joblib.dump(model, model_path)
|
||||
|
||||
# Calculate checksum for model file integrity
|
||||
checksummed_file = ChecksummedFile(str(model_path))
|
||||
model_checksum = checksummed_file.calculate_and_save_checksum()
|
||||
|
||||
# Enhanced metadata with checksum
|
||||
metadata = {
|
||||
"model_id": model_id,
|
||||
@@ -733,14 +771,23 @@ class BakeryProphetManager:
|
||||
"optimized_parameters": optimized_params or {},
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"model_type": "prophet_optimized",
|
||||
"file_path": str(model_path),
|
||||
"minio_path": model_path,
|
||||
"checksum": model_checksum,
|
||||
"checksum_algorithm": "sha256"
|
||||
}
|
||||
|
||||
# Store metadata in MinIO as well
|
||||
metadata_json = json.dumps(metadata, indent=2, default=str)
|
||||
metadata_object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.json"
|
||||
minio_client.put_object(
|
||||
bucket_name="training-models",
|
||||
object_name=metadata_object_name,
|
||||
data=metadata_json,
|
||||
content_type="application/json"
|
||||
)
|
||||
|
||||
metadata_path = model_path.with_suffix('.json')
|
||||
with open(metadata_path, 'w') as f:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
# Define metadata_path for database record
|
||||
metadata_path = f"minio://training-models/{metadata_object_name}"
|
||||
|
||||
# Store in memory
|
||||
model_key = f"{tenant_id}:{inventory_product_id}"
|
||||
@@ -854,16 +901,10 @@ class BakeryProphetManager:
|
||||
model_path: str,
|
||||
future_dates: pd.DataFrame,
|
||||
regressor_columns: List[str]) -> pd.DataFrame:
|
||||
"""Generate forecast using stored model with checksum verification"""
|
||||
"""Generate forecast using stored model from MinIO"""
|
||||
try:
|
||||
# Verify model file integrity before loading
|
||||
checksummed_file = ChecksummedFile(model_path)
|
||||
if not checksummed_file.load_and_verify_checksum():
|
||||
logger.warning(f"Checksum verification failed for model: {model_path}")
|
||||
# Still load the model but log warning
|
||||
# In production, you might want to raise an exception instead
|
||||
|
||||
model = joblib.load(model_path)
|
||||
# Load model from MinIO
|
||||
model = await self._load_model_from_minio(model_path)
|
||||
|
||||
for regressor in regressor_columns:
|
||||
if regressor not in future_dates.columns:
|
||||
@@ -876,6 +917,33 @@ class BakeryProphetManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate forecast: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _load_model_from_minio(self, model_path: str):
|
||||
"""Load model from MinIO storage"""
|
||||
try:
|
||||
# Parse MinIO path: minio://bucket_name/object_path
|
||||
if not model_path.startswith("minio://"):
|
||||
raise ValueError(f"Invalid MinIO path: {model_path}")
|
||||
|
||||
_, bucket_and_path = model_path.split("://", 1)
|
||||
bucket_name, object_name = bucket_and_path.split("/", 1)
|
||||
|
||||
logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
|
||||
|
||||
# Download model data from MinIO
|
||||
model_data = self.minio_client.get_object(bucket_name, object_name)
|
||||
if not model_data:
|
||||
raise ValueError(f"Failed to download model from MinIO: {model_path}")
|
||||
|
||||
# Deserialize model (using BytesIO since joblib.load reads from file-like objects)
|
||||
buffer = io.BytesIO(model_data)
|
||||
model = joblib.load(buffer)
|
||||
logger.info(f"Model loaded successfully from MinIO: {model_path}")
|
||||
return model
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
|
||||
raise
|
||||
|
||||
async def _validate_training_data(self, df: pd.DataFrame, inventory_product_id: str):
|
||||
"""Validate training data quality (unchanged)"""
|
||||
|
||||
@@ -17,6 +17,7 @@ scikit-learn==1.6.1
|
||||
pandas==2.2.3
|
||||
numpy==2.2.2
|
||||
joblib==1.4.2
|
||||
minio==7.2.2
|
||||
xgboost==2.1.3
|
||||
|
||||
# HTTP client
|
||||
|
||||
418
shared/clients/minio_client.py
Normal file
418
shared/clients/minio_client.py
Normal file
@@ -0,0 +1,418 @@
|
||||
"""
|
||||
MinIO Client Library
|
||||
Shared client for MinIO object storage operations with TLS support
|
||||
"""
|
||||
|
||||
import os
|
||||
import io
|
||||
import ssl
|
||||
import time
|
||||
import urllib3
|
||||
from typing import Optional, Dict, Any, Union
|
||||
from pathlib import Path
|
||||
from functools import wraps
|
||||
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
import structlog
|
||||
|
||||
# Configure logger
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
def with_retry(max_retries: int = 3, base_delay: float = 1.0, max_delay: float = 30.0):
|
||||
"""Decorator for retrying operations with exponential backoff
|
||||
|
||||
Args:
|
||||
max_retries: Maximum number of retry attempts
|
||||
base_delay: Initial delay between retries in seconds
|
||||
max_delay: Maximum delay between retries in seconds
|
||||
"""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
last_exception = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except (S3Error, urllib3.exceptions.HTTPError, ConnectionError, TimeoutError) as e:
|
||||
last_exception = e
|
||||
if attempt < max_retries:
|
||||
# Exponential backoff with jitter
|
||||
delay = min(base_delay * (2 ** attempt), max_delay)
|
||||
logger.warning(
|
||||
f"MinIO operation failed, retrying in {delay:.1f}s",
|
||||
attempt=attempt + 1,
|
||||
max_retries=max_retries,
|
||||
error=str(e)
|
||||
)
|
||||
time.sleep(delay)
|
||||
else:
|
||||
logger.error(
|
||||
"MinIO operation failed after all retries",
|
||||
attempts=max_retries + 1,
|
||||
error=str(e)
|
||||
)
|
||||
raise last_exception
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
class MinIOClient:
|
||||
"""Client for MinIO object storage operations with TLS support"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize MinIO client with configuration"""
|
||||
self._client = None
|
||||
self._initialize_client()
|
||||
|
||||
def _initialize_client(self) -> None:
|
||||
"""Initialize MinIO client from environment variables with SSL/TLS support"""
|
||||
try:
|
||||
# Get configuration from environment
|
||||
endpoint = os.getenv("MINIO_ENDPOINT", "minio.bakery-ia.svc.cluster.local:9000")
|
||||
access_key = os.getenv("MINIO_ACCESS_KEY", os.getenv("MINIO_ROOT_USER", "admin"))
|
||||
secret_key = os.getenv("MINIO_SECRET_KEY", os.getenv("MINIO_ROOT_PASSWORD", "secure-password"))
|
||||
use_ssl = os.getenv("MINIO_USE_SSL", "true").lower() == "true"
|
||||
|
||||
# TLS certificate paths (optional - for cert verification)
|
||||
ca_cert_path = os.getenv("MINIO_CA_CERT_PATH", "/etc/ssl/certs/minio-ca.crt")
|
||||
# SSL verification is disabled by default for internal cluster with self-signed certs
|
||||
# Set MINIO_VERIFY_SSL=true and provide CA cert path for production with proper certs
|
||||
verify_ssl = os.getenv("MINIO_VERIFY_SSL", "false").lower() == "true"
|
||||
|
||||
# Try to get settings from service configuration if available
|
||||
try:
|
||||
from app.core.config import settings
|
||||
if hasattr(settings, 'MINIO_ENDPOINT'):
|
||||
endpoint = settings.MINIO_ENDPOINT
|
||||
access_key = settings.MINIO_ACCESS_KEY
|
||||
secret_key = settings.MINIO_SECRET_KEY
|
||||
use_ssl = settings.MINIO_USE_SSL
|
||||
except ImportError:
|
||||
# Fallback to environment variables (for shared client usage)
|
||||
pass
|
||||
|
||||
# Configure HTTP client with TLS settings
|
||||
http_client = None
|
||||
if use_ssl:
|
||||
# Create custom HTTP client for TLS
|
||||
if verify_ssl and os.path.exists(ca_cert_path):
|
||||
# Verify certificates against CA
|
||||
http_client = urllib3.PoolManager(
|
||||
timeout=urllib3.Timeout(connect=10.0, read=60.0),
|
||||
maxsize=10,
|
||||
cert_reqs='CERT_REQUIRED',
|
||||
ca_certs=ca_cert_path,
|
||||
retries=urllib3.Retry(
|
||||
total=5,
|
||||
backoff_factor=0.2,
|
||||
status_forcelist=[500, 502, 503, 504]
|
||||
)
|
||||
)
|
||||
logger.info("MinIO TLS with certificate verification enabled",
|
||||
ca_cert_path=ca_cert_path)
|
||||
else:
|
||||
# TLS without certificate verification (for self-signed certs in internal cluster)
|
||||
# Still encrypted, just skips cert validation
|
||||
http_client = urllib3.PoolManager(
|
||||
timeout=urllib3.Timeout(connect=10.0, read=60.0),
|
||||
maxsize=10,
|
||||
cert_reqs='CERT_NONE',
|
||||
retries=urllib3.Retry(
|
||||
total=5,
|
||||
backoff_factor=0.2,
|
||||
status_forcelist=[500, 502, 503, 504]
|
||||
)
|
||||
)
|
||||
# Suppress insecure request warnings for internal cluster
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
logger.info("MinIO TLS enabled without certificate verification (internal cluster)")
|
||||
|
||||
# Initialize client with SSL/TLS
|
||||
self._client = Minio(
|
||||
endpoint,
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
secure=use_ssl,
|
||||
http_client=http_client
|
||||
)
|
||||
|
||||
logger.info("MinIO client initialized successfully",
|
||||
endpoint=endpoint,
|
||||
use_ssl=use_ssl,
|
||||
verify_ssl=verify_ssl if use_ssl else False)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize MinIO client", error=str(e))
|
||||
raise
|
||||
|
||||
def reconnect(self) -> bool:
|
||||
"""Reconnect to MinIO server
|
||||
|
||||
Useful when connection is lost or credentials have changed.
|
||||
|
||||
Returns:
|
||||
True if reconnection succeeded, False otherwise
|
||||
"""
|
||||
try:
|
||||
logger.info("Attempting to reconnect to MinIO...")
|
||||
self._initialize_client()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Failed to reconnect to MinIO", error=str(e))
|
||||
return False
|
||||
|
||||
@with_retry(max_retries=3, base_delay=1.0)
|
||||
def bucket_exists(self, bucket_name: str) -> bool:
|
||||
"""Check if bucket exists - handles limited permissions gracefully"""
|
||||
try:
|
||||
# First try the standard method
|
||||
return self._client.bucket_exists(bucket_name)
|
||||
except S3Error as e:
|
||||
# If we get AccessDenied, try alternative method for limited-permission users
|
||||
if e.code == "AccessDenied":
|
||||
logger.debug("Access denied for bucket_exists, trying alternative method",
|
||||
bucket_name=bucket_name)
|
||||
try:
|
||||
# Try to list objects - this works with ListBucket permission
|
||||
# If bucket doesn't exist, this will raise NoSuchBucket error
|
||||
# If bucket exists but user has no permission, this will raise AccessDenied
|
||||
objects = list(self._client.list_objects(bucket_name, recursive=False))
|
||||
logger.debug("Bucket exists (verified via list_objects)", bucket_name=bucket_name)
|
||||
return True
|
||||
except S3Error as list_error:
|
||||
if list_error.code == "NoSuchBucket":
|
||||
logger.debug("Bucket does not exist", bucket_name=bucket_name)
|
||||
return False
|
||||
else:
|
||||
logger.error("Failed to check bucket existence (alternative method)",
|
||||
bucket_name=bucket_name,
|
||||
error=str(list_error))
|
||||
return False
|
||||
else:
|
||||
logger.error("Failed to check bucket existence",
|
||||
bucket_name=bucket_name,
|
||||
error=str(e))
|
||||
return False
|
||||
|
||||
def create_bucket(self, bucket_name: str, region: str = "us-east-1") -> bool:
|
||||
"""Create a new bucket if it doesn't exist"""
|
||||
try:
|
||||
if not self.bucket_exists(bucket_name):
|
||||
self._client.make_bucket(bucket_name, region)
|
||||
logger.info("Created MinIO bucket", bucket_name=bucket_name)
|
||||
return True
|
||||
return False
|
||||
except S3Error as e:
|
||||
logger.error("Failed to create bucket",
|
||||
bucket_name=bucket_name,
|
||||
error=str(e))
|
||||
return False
|
||||
|
||||
@with_retry(max_retries=3, base_delay=1.0)
|
||||
def put_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_name: str,
|
||||
data: Union[bytes, io.BytesIO, str, Path],
|
||||
length: Optional[int] = None,
|
||||
content_type: str = "application/octet-stream",
|
||||
metadata: Optional[Dict[str, str]] = None
|
||||
) -> bool:
|
||||
"""Upload an object to MinIO
|
||||
|
||||
Args:
|
||||
bucket_name: Target bucket name
|
||||
object_name: Object key/path in the bucket
|
||||
data: Data to upload (bytes, BytesIO, string, or Path)
|
||||
length: Optional data length (calculated automatically if not provided)
|
||||
content_type: MIME type of the object
|
||||
metadata: Optional metadata dictionary
|
||||
|
||||
Returns:
|
||||
True if upload succeeded, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Ensure bucket exists
|
||||
self.create_bucket(bucket_name)
|
||||
|
||||
# Convert data to bytes if needed
|
||||
if isinstance(data, str):
|
||||
data = data.encode('utf-8')
|
||||
elif isinstance(data, Path):
|
||||
with open(data, 'rb') as f:
|
||||
data = f.read()
|
||||
elif isinstance(data, io.BytesIO):
|
||||
data = data.getvalue()
|
||||
|
||||
# Calculate length if not provided
|
||||
data_length = length if length is not None else len(data)
|
||||
|
||||
# MinIO SDK requires BytesIO stream and explicit length
|
||||
data_stream = io.BytesIO(data)
|
||||
|
||||
# Upload object with proper stream and length
|
||||
self._client.put_object(
|
||||
bucket_name,
|
||||
object_name,
|
||||
data_stream,
|
||||
length=data_length,
|
||||
content_type=content_type,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
logger.info("Uploaded object to MinIO",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
size=data_length)
|
||||
|
||||
return True
|
||||
|
||||
except S3Error as e:
|
||||
logger.error("Failed to upload object",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
error=str(e))
|
||||
return False
|
||||
|
||||
@with_retry(max_retries=3, base_delay=1.0)
|
||||
def get_object(self, bucket_name: str, object_name: str) -> Optional[bytes]:
|
||||
"""Download an object from MinIO"""
|
||||
try:
|
||||
# Get object data
|
||||
response = self._client.get_object(bucket_name, object_name)
|
||||
data = response.read()
|
||||
|
||||
logger.info("Downloaded object from MinIO",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
size=len(data))
|
||||
|
||||
return data
|
||||
|
||||
except S3Error as e:
|
||||
logger.error("Failed to download object",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
error=str(e))
|
||||
return None
|
||||
|
||||
def object_exists(self, bucket_name: str, object_name: str) -> bool:
|
||||
"""Check if object exists"""
|
||||
try:
|
||||
self._client.stat_object(bucket_name, object_name)
|
||||
return True
|
||||
except S3Error:
|
||||
return False
|
||||
|
||||
def list_objects(self, bucket_name: str, prefix: str = "") -> list:
|
||||
"""List objects in bucket with optional prefix"""
|
||||
try:
|
||||
objects = self._client.list_objects(bucket_name, prefix=prefix, recursive=True)
|
||||
return [obj.object_name for obj in objects]
|
||||
except S3Error as e:
|
||||
logger.error("Failed to list objects",
|
||||
bucket_name=bucket_name,
|
||||
prefix=prefix,
|
||||
error=str(e))
|
||||
return []
|
||||
|
||||
def delete_object(self, bucket_name: str, object_name: str) -> bool:
|
||||
"""Delete an object from MinIO"""
|
||||
try:
|
||||
self._client.remove_object(bucket_name, object_name)
|
||||
logger.info("Deleted object from MinIO",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name)
|
||||
return True
|
||||
except S3Error as e:
|
||||
logger.error("Failed to delete object",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
error=str(e))
|
||||
return False
|
||||
|
||||
def get_presigned_url(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_name: str,
|
||||
expires: int = 3600
|
||||
) -> Optional[str]:
|
||||
"""Generate presigned URL for object access"""
|
||||
try:
|
||||
url = self._client.presigned_get_object(
|
||||
bucket_name,
|
||||
object_name,
|
||||
expires=expires
|
||||
)
|
||||
return url
|
||||
except S3Error as e:
|
||||
logger.error("Failed to generate presigned URL",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
error=str(e))
|
||||
return None
|
||||
|
||||
def copy_object(
|
||||
self,
|
||||
source_bucket: str,
|
||||
source_object: str,
|
||||
dest_bucket: str,
|
||||
dest_object: str
|
||||
) -> bool:
|
||||
"""Copy object within MinIO"""
|
||||
try:
|
||||
# Ensure destination bucket exists
|
||||
self.create_bucket(dest_bucket)
|
||||
|
||||
# Copy object
|
||||
self._client.copy_object(dest_bucket, dest_object,
|
||||
f"{source_bucket}/{source_object}")
|
||||
|
||||
logger.info("Copied object in MinIO",
|
||||
source_bucket=source_bucket,
|
||||
source_object=source_object,
|
||||
dest_bucket=dest_bucket,
|
||||
dest_object=dest_object)
|
||||
|
||||
return True
|
||||
except S3Error as e:
|
||||
logger.error("Failed to copy object",
|
||||
source_bucket=source_bucket,
|
||||
source_object=source_object,
|
||||
dest_bucket=dest_bucket,
|
||||
dest_object=dest_object,
|
||||
error=str(e))
|
||||
return False
|
||||
|
||||
def get_object_metadata(self, bucket_name: str, object_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get object metadata"""
|
||||
try:
|
||||
stat = self._client.stat_object(bucket_name, object_name)
|
||||
return {
|
||||
"size": stat.size,
|
||||
"last_modified": stat.last_modified,
|
||||
"content_type": stat.content_type,
|
||||
"metadata": stat.metadata or {}
|
||||
}
|
||||
except S3Error as e:
|
||||
logger.error("Failed to get object metadata",
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
error=str(e))
|
||||
return None
|
||||
|
||||
def health_check(self) -> bool:
|
||||
"""Check MinIO service health"""
|
||||
try:
|
||||
# Simple bucket list to check connectivity
|
||||
self._client.list_buckets()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("MinIO health check failed", error=str(e))
|
||||
return False
|
||||
|
||||
|
||||
# Singleton instance for convenience
|
||||
minio_client = MinIOClient()
|
||||
@@ -315,10 +315,9 @@ class BaseServiceSettings(BaseSettings):
|
||||
# ================================================================
|
||||
# ML & AI CONFIGURATION
|
||||
# ================================================================
|
||||
|
||||
# Model Storage
|
||||
MODEL_STORAGE_PATH: str = os.getenv("MODEL_STORAGE_PATH", "/app/models")
|
||||
MODEL_STORAGE_BACKEND: str = os.getenv("MODEL_STORAGE_BACKEND", "local") # local, s3, gcs
|
||||
|
||||
# Model Storage Backend (MinIO is the primary storage)
|
||||
MODEL_STORAGE_BACKEND: str = os.getenv("MODEL_STORAGE_BACKEND", "minio")
|
||||
|
||||
# Training Configuration
|
||||
MAX_TRAINING_TIME_MINUTES: int = int(os.getenv("MAX_TRAINING_TIME_MINUTES", "30"))
|
||||
|
||||
@@ -308,6 +308,47 @@ def add_metrics_middleware(app, metrics_collector: MetricsCollector):
|
||||
return metrics_collector
|
||||
|
||||
|
||||
def track_user_activity(user_id: str, action: str, service_name: str = "unknown-service", metadata: dict = None):
|
||||
"""Track user activity metrics using the appropriate metrics collector"""
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
|
||||
# Add user-specific attributes
|
||||
attributes = {
|
||||
"user.id": user_id,
|
||||
"action": action,
|
||||
**metadata
|
||||
}
|
||||
|
||||
# Get the metrics collector for the specified service
|
||||
metrics_collector = get_metrics_collector(service_name)
|
||||
|
||||
if metrics_collector:
|
||||
# Use the collector's counter registration system
|
||||
counter_name = "user_activity_total"
|
||||
|
||||
# Check if counter already exists, if not register it
|
||||
if counter_name not in metrics_collector._counters:
|
||||
metrics_collector.register_counter(
|
||||
name=counter_name,
|
||||
documentation="Total user activity events"
|
||||
)
|
||||
|
||||
# Increment the counter with attributes
|
||||
metrics_collector.increment_counter(counter_name, value=1, labels=attributes)
|
||||
else:
|
||||
# Fallback: create a temporary counter if no collector exists
|
||||
from opentelemetry import metrics
|
||||
|
||||
meter = metrics.get_meter(__name__)
|
||||
user_activity_counter = meter.create_counter(
|
||||
name="user_activity_total",
|
||||
description="User activity events",
|
||||
unit="events"
|
||||
)
|
||||
user_activity_counter.add(1, attributes)
|
||||
|
||||
|
||||
def setup_metrics_early(
|
||||
app,
|
||||
service_name: str = None,
|
||||
|
||||
Reference in New Issue
Block a user