Add minio support and forntend analitycs

This commit is contained in:
Urtzi Alfaro
2026-01-17 22:42:40 +01:00
parent fbc670ddb3
commit 3c4b5c2a06
53 changed files with 3485 additions and 437 deletions

263
Tiltfile
View File

@@ -16,22 +16,142 @@
# - Gateway only rebuilds when gateway/ or shared/ code changes
# =============================================================================
# =============================================================================
# TILT CONFIGURATION
# =============================================================================
# Update settings
update_settings(
max_parallel_updates=2, # Reduce parallel updates to avoid resource exhaustion
k8s_upsert_timeout_secs=120 # Increase timeout for slower local builds
)
# Ensure we're running in the correct context
allow_k8s_contexts('kind-bakery-ia-local')
# =============================================================================
# DISK SPACE MANAGEMENT & CLEANUP CONFIGURATION
# =============================================================================
# Disk space management settings
disk_cleanup_enabled = True # Default to True, can be disabled with TILT_DISABLE_CLEANUP=true
if 'TILT_DISABLE_CLEANUP' in os.environ:
disk_cleanup_enabled = os.environ['TILT_DISABLE_CLEANUP'].lower() != 'true'
disk_space_threshold_gb = '10'
if 'TILT_DISK_THRESHOLD_GB' in os.environ:
disk_space_threshold_gb = os.environ['TILT_DISK_THRESHOLD_GB']
disk_cleanup_frequency_minutes = '30'
if 'TILT_CLEANUP_FREQUENCY' in os.environ:
disk_cleanup_frequency_minutes = os.environ['TILT_CLEANUP_FREQUENCY']
print("""
DISK SPACE MANAGEMENT CONFIGURATION
======================================
Cleanup Enabled: {}
Free Space Threshold: {}GB
Cleanup Frequency: Every {} minutes
To disable cleanup: export TILT_DISABLE_CLEANUP=true
To change threshold: export TILT_DISK_THRESHOLD_GB=20
To change frequency: export TILT_CLEANUP_FREQUENCY=60
""".format(
'YES' if disk_cleanup_enabled else 'NO (TILT_DISABLE_CLEANUP=true)',
disk_space_threshold_gb,
disk_cleanup_frequency_minutes
))
# Automatic cleanup scheduler (informational only - actual scheduling done externally)
if disk_cleanup_enabled:
local_resource(
'automatic-disk-cleanup-info',
cmd='''
echo "Automatic disk cleanup is ENABLED"
echo "Settings:"
echo " - Threshold: ''' + disk_space_threshold_gb + ''' GB free space"
echo " - Frequency: Every ''' + disk_cleanup_frequency_minutes + ''' minutes"
echo ""
echo "Note: Actual cleanup runs via external scheduling (cron job or similar)"
echo "To run cleanup now: tilt trigger manual-disk-cleanup"
''',
labels=['99-cleanup'],
auto_init=True,
allow_parallel=False
)
# Manual cleanup trigger (can be run on demand)
local_resource(
'manual-disk-cleanup',
cmd='''
echo "Starting manual disk cleanup..."
python3 scripts/cleanup_disk_space.py --manual --verbose
''',
labels=['99-cleanup'],
auto_init=False,
allow_parallel=False
)
# Disk space monitoring resource
local_resource(
'disk-space-monitor',
cmd='''
echo "DISK SPACE MONITORING"
echo "======================================"
# Get disk usage
df -h / | grep -v Filesystem | awk '{{print "Total: " $2 " | Used: " $3 " | Free: " $4 " | Usage: " $5}}'
# Get Docker disk usage
echo ""
echo "DOCKER DISK USAGE:"
docker system df
# Get Kubernetes disk usage (if available)
echo ""
echo "KUBERNETES DISK USAGE:"
kubectl get pvc -n bakery-ia --no-headers 2>/dev/null | awk '{{print "PVC: " $1 " | Status: " $2 " | Capacity: " $3 " | Used: " $4}}' || echo " Kubernetes PVCs not available"
echo ""
echo "Cleanup Status:"
if [ "{disk_cleanup_enabled}" = "True" ]; then
echo " Automatic cleanup: ENABLED (every {disk_cleanup_frequency_minutes} minutes)"
echo " Threshold: {disk_space_threshold_gb}GB free space"
else
echo " Automatic cleanup: DISABLED"
echo " To enable: unset TILT_DISABLE_CLEANUP or set TILT_DISABLE_CLEANUP=false"
fi
echo ""
echo "Manual cleanup commands:"
echo " tilt trigger manual-disk-cleanup # Run cleanup now"
echo " docker system prune -a # Manual Docker cleanup"
echo " kubectl delete jobs --all # Clean up completed jobs"
''',
labels=['99-cleanup'],
auto_init=False,
allow_parallel=False
)
# =============================================================================
# DOCKER REGISTRY CONFIGURATION
# =============================================================================
# Docker registry configuration
# Set USE_DOCKERHUB=true environment variable to push images to Docker Hub
# Otherwise, uses local registry for faster builds and deployments
use_dockerhub = os.getenv('USE_DOCKERHUB', 'false').lower() == 'true'
dockerhub_username = os.getenv('DOCKERHUB_USERNAME', 'uals')
use_dockerhub = False # Default to False
if 'USE_DOCKERHUB' in os.environ:
use_dockerhub = os.environ['USE_DOCKERHUB'].lower() == 'true'
dockerhub_username = 'uals' # Default username
if 'DOCKERHUB_USERNAME' in os.environ:
dockerhub_username = os.environ['DOCKERHUB_USERNAME']
if use_dockerhub:
print("""
🐳 DOCKER HUB MODE ENABLED
DOCKER HUB MODE ENABLED
Images will be pushed to Docker Hub: docker.io/%s
Make sure you're logged in: docker login
To disable: unset USE_DOCKERHUB or set USE_DOCKERHUB=false
@@ -39,7 +159,7 @@ if use_dockerhub:
default_registry('docker.io/%s' % dockerhub_username)
else:
print("""
🏠 LOCAL REGISTRY MODE
LOCAL REGISTRY MODE
Using local registry for faster builds: localhost:5001
This registry is created by kubernetes_restart.sh script
To use Docker Hub: export USE_DOCKERHUB=true
@@ -52,20 +172,21 @@ else:
print("""
======================================
🔐 Bakery IA Secure Development Mode
Bakery IA Secure Development Mode
======================================
Security Features:
TLS encryption for PostgreSQL and Redis
Strong 32-character passwords
PersistentVolumeClaims (no data loss)
pgcrypto extension for encryption
PostgreSQL audit logging
TLS encryption for PostgreSQL and Redis
Strong 32-character passwords
PersistentVolumeClaims (no data loss)
Column encryption: pgcrypto extension
Audit logging: PostgreSQL query logging
Object storage: MinIO with TLS for ML models
Monitoring:
📊 Service metrics available at /metrics endpoints
🔍 Telemetry ready (traces, metrics, logs)
SigNoz deployment optional for local dev (see signoz-info resource)
Service metrics available at /metrics endpoints
Telemetry ready (traces, metrics, logs)
SigNoz deployment optional for local dev (see signoz-info resource)
Applying security configurations...
""")
@@ -74,7 +195,7 @@ Applying security configurations...
local_resource(
'dockerhub-secret',
cmd='''
echo "🐳 Setting up Docker Hub image pull secret..."
echo "Setting up Docker Hub image pull secret..."
# Check if Docker Hub credentials are available
if [ -n "$DOCKERHUB_USERNAME" ] && [ -n "$DOCKERHUB_PASSWORD" ]; then
@@ -84,7 +205,7 @@ local_resource(
echo " Attempting to use Docker CLI credentials..."
./infrastructure/kubernetes/create-dockerhub-secret.sh
else
echo " ⚠️ Docker Hub credentials not found"
echo " Docker Hub credentials not found"
echo " To enable automatic Docker Hub authentication:"
echo " 1. Run 'docker login', OR"
echo " 2. Set environment variables:"
@@ -103,13 +224,13 @@ local_resource(
local_resource(
'security-setup',
cmd='''
echo "📦 Applying security secrets and configurations..."
echo "Applying security secrets and configurations..."
kubectl apply -f infrastructure/kubernetes/base/secrets.yaml
kubectl apply -f infrastructure/kubernetes/base/secrets/postgres-tls-secret.yaml
kubectl apply -f infrastructure/kubernetes/base/secrets/redis-tls-secret.yaml
kubectl apply -f infrastructure/kubernetes/base/configs/postgres-init-config.yaml
kubectl apply -f infrastructure/kubernetes/base/configmaps/postgres-logging-config.yaml
echo "Security configurations applied"
echo "Security configurations applied"
''',
resource_deps=['dockerhub-secret'],
labels=['00-security'],
@@ -120,7 +241,7 @@ local_resource(
local_resource(
'verify-tls',
cmd='''
echo "🔍 Verifying TLS configuration..."
echo "Verifying TLS configuration..."
sleep 5 # Wait for pods to be ready
# Check if auth-db pod exists and has TLS certs
@@ -129,8 +250,8 @@ local_resource(
if [ -n "$AUTH_POD" ]; then
echo " Checking PostgreSQL TLS certificates..."
kubectl exec -n bakery-ia "$AUTH_POD" -- ls -la /tls/ 2>/dev/null && \
echo " PostgreSQL TLS certificates mounted" || \
echo " ⚠️ PostgreSQL TLS certificates not found (pods may still be starting)"
echo " PostgreSQL TLS certificates mounted" || \
echo " PostgreSQL TLS certificates not found (pods may still be starting)"
fi
# Check if redis pod exists and has TLS certs
@@ -139,15 +260,14 @@ local_resource(
if [ -n "$REDIS_POD" ]; then
echo " Checking Redis TLS certificates..."
kubectl exec -n bakery-ia "$REDIS_POD" -- ls -la /tls/ 2>/dev/null && \
echo " Redis TLS certificates mounted" || \
echo " ⚠️ Redis TLS certificates not found (pods may still be starting)"
echo " Redis TLS certificates mounted" || \
echo " Redis TLS certificates not found (pods may still be starting)"
fi
echo "TLS verification complete"
echo "TLS verification complete"
''',
resource_deps=['auth-db', 'redis'],
auto_init=True,
trigger_mode=TRIGGER_MODE_MANUAL,
labels=['00-security']
)
@@ -155,15 +275,14 @@ local_resource(
local_resource(
'verify-pvcs',
cmd='''
echo "🔍 Verifying PersistentVolumeClaims..."
kubectl get pvc -n bakery-ia | grep -E "NAME|db-pvc" || echo " ⚠️ PVCs not yet bound"
echo "Verifying PersistentVolumeClaims..."
kubectl get pvc -n bakery-ia | grep -E "NAME|db-pvc" || echo " PVCs not yet bound"
PVC_COUNT=$(kubectl get pvc -n bakery-ia -o json | jq '.items | length')
echo " Found $PVC_COUNT PVCs"
echo "PVC verification complete"
echo "PVC verification complete"
''',
resource_deps=['auth-db'],
auto_init=True,
trigger_mode=TRIGGER_MODE_MANUAL,
labels=['00-security']
)
@@ -171,11 +290,11 @@ local_resource(
local_resource(
'cert-manager-install',
cmd='''
echo "📦 Installing cert-manager..."
echo "Installing cert-manager..."
# Check if cert-manager CRDs already exist
if kubectl get crd certificates.cert-manager.io >/dev/null 2>&1; then
echo " cert-manager CRDs already installed"
echo " cert-manager CRDs already installed"
else
echo " Installing cert-manager v1.13.2..."
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.yaml
@@ -184,10 +303,10 @@ local_resource(
kubectl wait --for=condition=available --timeout=120s deployment/cert-manager -n cert-manager
kubectl wait --for=condition=available --timeout=120s deployment/cert-manager-webhook -n cert-manager
echo " cert-manager installed and ready"
echo " cert-manager installed and ready"
fi
echo "cert-manager verification complete"
echo "cert-manager verification complete"
''',
labels=['00-security'],
auto_init=True
@@ -265,19 +384,21 @@ def build_python_service(service_name, service_path):
# =============================================================================
# Frontend (React + Vite)
frontend_debug_env = os.getenv('FRONTEND_DEBUG', 'false')
frontend_debug_env = 'false' # Default to false
if 'FRONTEND_DEBUG' in os.environ:
frontend_debug_env = os.environ['FRONTEND_DEBUG']
frontend_debug = frontend_debug_env.lower() == 'true'
if frontend_debug:
print("""
🐛 FRONTEND DEBUG MODE ENABLED
FRONTEND DEBUG MODE ENABLED
Building frontend with NO minification for easier debugging.
Full React error messages will be displayed.
To disable: unset FRONTEND_DEBUG or set FRONTEND_DEBUG=false
""")
else:
print("""
📦 FRONTEND PRODUCTION MODE
FRONTEND PRODUCTION MODE
Building frontend with minification for optimized performance.
To enable debug mode: export FRONTEND_DEBUG=true
""")
@@ -384,6 +505,10 @@ k8s_resource('redis', resource_deps=['security-setup'], labels=['01-infrastructu
k8s_resource('rabbitmq', labels=['01-infrastructure'])
k8s_resource('nominatim', labels=['01-infrastructure'])
# MinIO Storage
k8s_resource('minio', resource_deps=['security-setup'], labels=['01-infrastructure'])
k8s_resource('minio-bucket-init', resource_deps=['minio'], labels=['01-infrastructure'])
# =============================================================================
# MONITORING RESOURCES - SigNoz (Unified Observability)
# =============================================================================
@@ -392,25 +517,25 @@ k8s_resource('nominatim', labels=['01-infrastructure'])
local_resource(
'signoz-deploy',
cmd='''
echo "📊 Deploying SigNoz Monitoring Stack..."
echo "Deploying SigNoz Monitoring Stack..."
echo ""
# Ensure Docker Hub secret exists in bakery-ia namespace
echo "🔐 Ensuring Docker Hub secret exists in bakery-ia namespace..."
echo "Ensuring Docker Hub secret exists in bakery-ia namespace..."
if ! kubectl get secret dockerhub-creds -n bakery-ia &>/dev/null; then
echo " ⚠️ Docker Hub secret not found, attempting to create..."
echo " Docker Hub secret not found, attempting to create..."
./infrastructure/kubernetes/create-dockerhub-secret.sh || echo " Continuing without Docker Hub authentication..."
else
echo " Docker Hub secret exists"
echo " Docker Hub secret exists"
fi
echo ""
# Check if SigNoz is already deployed
if helm list -n bakery-ia | grep -q signoz; then
echo "SigNoz already deployed, checking status..."
echo "SigNoz already deployed, checking status..."
helm status signoz -n bakery-ia
else
echo "🚀 Installing SigNoz..."
echo "Installing SigNoz..."
# Add SigNoz Helm repository if not already added
helm repo add signoz https://charts.signoz.io 2>/dev/null || true
@@ -424,25 +549,23 @@ local_resource(
--wait
echo ""
echo "SigNoz deployment completed"
echo "SigNoz deployment completed"
fi
echo ""
echo "📈 SigNoz Access Information:"
echo "SigNoz Access Information:"
echo " URL: https://monitoring.bakery-ia.local"
echo " Username: admin"
echo " Password: admin"
echo ""
echo "🔧 OpenTelemetry Collector Endpoints:"
echo "OpenTelemetry Collector Endpoints:"
echo " gRPC: localhost:4317"
echo " HTTP: localhost:4318"
echo ""
echo "💡 To check pod status: kubectl get pods -n signoz"
echo "To check pod status: kubectl get pods -n signoz"
''',
labels=['05-monitoring'],
auto_init=False,
trigger_mode=TRIGGER_MODE_MANUAL,
allow_parallel=False
)
# Track SigNoz pods in Tilt UI using workload tracking
@@ -450,7 +573,7 @@ local_resource(
local_resource(
'signoz-status',
cmd='''
echo "📊 SigNoz Status Check"
echo "SigNoz Status Check"
echo ""
# Check pod status
@@ -470,19 +593,17 @@ local_resource(
echo "Pod Status: $READY_PODS/$TOTAL_PODS ready"
if [ "$READY_PODS" -eq "$TOTAL_PODS" ]; then
echo "All SigNoz pods are running!"
echo "All SigNoz pods are running!"
echo ""
echo "Access SigNoz at: https://monitoring.bakery-ia.local"
echo "Credentials: admin / admin"
else
echo "Waiting for pods to become ready..."
echo "Waiting for pods to become ready..."
fi
fi
''',
labels=['05-monitoring'],
resource_deps=['signoz-deploy'],
auto_init=False,
trigger_mode=TRIGGER_MODE_MANUAL
)
# Optional exporters (in monitoring namespace) - DISABLED since using SigNoz
@@ -566,7 +687,6 @@ k8s_resource('demo-session-migration', resource_deps=['demo-session-db'], labels
k8s_resource('external-data-init', resource_deps=['external-migration', 'redis'], labels=['08-data-init'])
k8s_resource('nominatim-init', labels=['08-data-init'])
# =============================================================================
# =============================================================================
# APPLICATION SERVICES
# =============================================================================
@@ -618,15 +738,9 @@ k8s_resource('demo-session-cleanup', resource_deps=['demo-session-service'], lab
k8s_resource('external-data-rotation', resource_deps=['external-service'], labels=['16-cronjobs'])
# =============================================================================
# TILT CONFIGURATION
# WATCH SETTINGS
# =============================================================================
# Update settings
update_settings(
max_parallel_updates=2, # Reduce parallel updates to avoid resource exhaustion
k8s_upsert_timeout_secs=120 # Increase timeout for slower local builds
)
# Watch settings
watch_settings(
ignore=[
@@ -665,18 +779,19 @@ watch_settings(
# =============================================================================
print("""
Security setup complete!
Security setup complete!
Database Security Features Active:
🔐 TLS encryption: PostgreSQL and Redis
🔑 Strong passwords: 32-character cryptographic
💾 Persistent storage: PVCs for all databases
🔒 Column encryption: pgcrypto extension
📋 Audit logging: PostgreSQL query logging
TLS encryption: PostgreSQL and Redis
Strong passwords: 32-character cryptographic
Persistent storage: PVCs for all databases
Column encryption: pgcrypto extension
Audit logging: PostgreSQL query logging
Internal Schedulers Active:
Alert Priority Recalculation: Hourly @ :15 (alert-processor)
Usage Tracking: Daily @ 2:00 AM UTC (tenant-service)
Alert Priority Recalculation: Hourly @ :15 (alert-processor)
Usage Tracking: Daily @ 2:00 AM UTC (tenant-service)
Disk Cleanup: Every {disk_cleanup_frequency_minutes} minutes (threshold: {disk_space_threshold_gb}GB)
Access your application:
Main Application: https://bakery-ia.local
@@ -708,11 +823,11 @@ Documentation:
docs/DATABASE_SECURITY_ANALYSIS_REPORT.md
Build Optimization Active:
Services only rebuild when their code changes
Shared folder changes trigger ALL services (as expected)
Reduces unnecessary rebuilds and disk usage
💡 Edit service code: only that service rebuilds
💡 Edit shared/ code: all services rebuild (required)
Services only rebuild when their code changes
Shared folder changes trigger ALL services (as expected)
Reduces unnecessary rebuilds and disk usage
Edit service code: only that service rebuilds
Edit shared/ code: all services rebuild (required)
Useful Commands:
# Work on specific services only
@@ -730,4 +845,4 @@ DNS Configuration:
# 127.0.0.1 monitoring.bakery-ia.local
======================================
""")
""")

View File

@@ -0,0 +1,154 @@
# MinIO Certificate Generation Guide
## Quick Start
To generate MinIO certificates with the correct format:
```bash
# Generate certificates
./infrastructure/tls/generate-minio-certificates.sh
# Update Kubernetes secret
kubectl delete secret -n bakery-ia minio-tls
kubectl apply -f infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
# Restart MinIO
kubectl rollout restart deployment -n bakery-ia minio
```
## Key Requirements
### Private Key Format
**Required**: Traditional RSA format (`BEGIN RSA PRIVATE KEY`)
**Problematic**: PKCS#8 format (`BEGIN PRIVATE KEY`)
### Certificate Files
- `minio-cert.pem` - Server certificate
- `minio-key.pem` - Private key (must be traditional RSA format)
- `ca-cert.pem` - CA certificate
## Verification
### Check Private Key Format
```bash
head -1 infrastructure/tls/minio/minio-key.pem
# Should output: -----BEGIN RSA PRIVATE KEY-----
```
### Verify Certificate Chain
```bash
openssl verify -CAfile infrastructure/tls/ca/ca-cert.pem \
infrastructure/tls/minio/minio-cert.pem
```
### Check Certificate Details
```bash
openssl x509 -in infrastructure/tls/minio/minio-cert.pem -noout \
-subject -issuer -dates
```
## Troubleshooting
### Error: "The private key contains additional data"
**Cause**: Private key is in PKCS#8 format instead of traditional RSA format
**Solution**: Convert the key:
```bash
openssl rsa -in minio-key.pem -traditional -out minio-key-fixed.pem
mv minio-key-fixed.pem minio-key.pem
```
### Error: "Unable to parse private key"
**Cause**: Certificate/key mismatch or corrupted files
**Solution**: Regenerate certificates and verify:
```bash
# Check modulus of certificate and key (should match)
openssl x509 -noout -modulus -in minio-cert.pem | openssl md5
openssl rsa -noout -modulus -in minio-key.pem | openssl md5
```
## Certificate Rotation
### Step-by-Step Process
1. **Generate new certificates**
```bash
./infrastructure/tls/generate-minio-certificates.sh
```
2. **Update base64 values in secret**
```bash
# Update infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
# with new base64 encoded certificate values
```
3. **Apply updated secret**
```bash
kubectl delete secret -n bakery-ia minio-tls
kubectl apply -f infrastructure/kubernetes/base/secrets/minio-tls-secret.yaml
```
4. **Restart MinIO pods**
```bash
kubectl rollout restart deployment -n bakery-ia minio
```
5. **Verify**
```bash
kubectl logs -n bakery-ia -l app.kubernetes.io/name=minio --tail=5
# Should show: API: https://minio.bakery-ia.svc.cluster.local:9000
```
## Technical Details
### Certificate Generation Process
1. **Generate private key** (RSA 4096-bit)
2. **Convert to traditional RSA format** (critical for MinIO)
3. **Create CSR** with proper SANs
4. **Sign with CA** (valid for 3 years)
5. **Set permissions** (600 for key, 644 for certs)
### SANs (Subject Alternative Names)
The certificate includes these SANs for comprehensive coverage:
- `minio.bakery-ia.svc.cluster.local` (primary)
- `minio.bakery-ia`
- `minio-console.bakery-ia.svc.cluster.local`
- `minio-console.bakery-ia`
- `minio`
- `minio-console`
- `localhost`
- `127.0.0.1`
### Secret Structure
The Kubernetes secret uses the standardized Opaque format:
```yaml
apiVersion: v1
kind: Secret
metadata:
name: minio-tls
namespace: bakery-ia
type: Opaque
data:
ca-cert.pem: <base64>
minio-cert.pem: <base64>
minio-key.pem: <base64>
```
## Best Practices
1. **Always verify private key format** before applying
2. **Test certificates** with `openssl verify` before deployment
3. **Use the generation script** to ensure consistency
4. **Document certificate expiration dates** for rotation planning
5. **Monitor MinIO logs** after certificate updates
## Related Documentation
- [MinIO TLS Fix Summary](MINIO_TLS_FIX_SUMMARY.md)
- [Kubernetes TLS Secrets Guide](../kubernetes-tls-guide.md)
- [Certificate Management Best Practices](../certificate-management.md)

View File

@@ -34,20 +34,47 @@ server {
# Note: API routing is handled by ingress, not by this nginx
# The frontend makes requests to /api which are routed by the ingress controller
# Static assets with aggressive caching (including source maps for debugging)
location ~* ^/assets/.*\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot|map)$ {
expires 1y;
add_header Cache-Control "public, immutable";
add_header Vary Accept-Encoding;
# Source map files - serve with proper CORS headers and content type
# Note: These are typically only needed in development, but served in production for error reporting
location ~* ^/assets/.*\.map$ {
# Short cache time to avoid mismatches with JS files
expires 1m;
add_header Cache-Control "public, must-revalidate";
add_header Access-Control-Allow-Origin "*";
add_header Access-Control-Allow-Methods "GET";
add_header Access-Control-Allow-Headers "Content-Type";
add_header Content-Type "application/json";
# Disable access logging for source maps as they're requested frequently
access_log off;
try_files $uri =404;
}
# Also handle JS and CSS files anywhere in the structure (for dynamic imports)
location ~* \.(js|css)$ {
# Static assets with appropriate caching
# Note: JS/CSS files have content hashes for cache busting, but use shorter cache times to handle deployment issues
location ~* ^/assets/.*\.(js|css)$ {
expires 1h;
add_header Cache-Control "public";
add_header Vary Accept-Encoding;
add_header Access-Control-Allow-Origin "*";
access_log off;
try_files $uri =404;
}
# Static assets that don't change often (images, fonts) can have longer cache times
location ~* ^/assets/.*\.(png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
expires 1y;
add_header Cache-Control "public, immutable";
add_header Vary Accept-Encoding;
add_header Access-Control-Allow-Origin "*";
access_log off;
try_files $uri =404;
}
# Handle JS and CSS files anywhere in the structure (for dynamic imports) with shorter cache
location ~* \.(js|css)$ {
expires 1h;
add_header Cache-Control "public";
add_header Vary Accept-Encoding;
access_log off;
try_files $uri =404;
}

View File

@@ -9,6 +9,13 @@
"version": "2.0.0",
"dependencies": {
"@hookform/resolvers": "^3.3.2",
"@opentelemetry/api": "^1.9.0",
"@opentelemetry/exporter-metrics-otlp-http": "^0.210.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.210.0",
"@opentelemetry/resources": "^2.4.0",
"@opentelemetry/sdk-metrics": "^2.4.0",
"@opentelemetry/sdk-trace-web": "^2.4.0",
"@opentelemetry/semantic-conventions": "^1.39.0",
"@radix-ui/react-accordion": "^1.1.2",
"@radix-ui/react-checkbox": "^1.0.4",
"@radix-ui/react-dialog": "^1.0.5",
@@ -2976,6 +2983,209 @@
"dev": true,
"license": "MIT"
},
"node_modules/@opentelemetry/api": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
"license": "Apache-2.0",
"peer": true,
"engines": {
"node": ">=8.0.0"
}
},
"node_modules/@opentelemetry/api-logs": {
"version": "0.210.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.210.0.tgz",
"integrity": "sha512-CMtLxp+lYDriveZejpBND/2TmadrrhUfChyxzmkFtHaMDdSKfP59MAYyA0ICBvEBdm3iXwLcaj/8Ic/pnGw9Yg==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/api": "^1.3.0"
},
"engines": {
"node": ">=8.0.0"
}
},
"node_modules/@opentelemetry/core": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.4.0.tgz",
"integrity": "sha512-KtcyFHssTn5ZgDu6SXmUznS80OFs/wN7y6MyFRRcKU6TOw8hNcGxKvt8hsdaLJfhzUszNSjURetq5Qpkad14Gw==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/semantic-conventions": "^1.29.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": ">=1.0.0 <1.10.0"
}
},
"node_modules/@opentelemetry/exporter-metrics-otlp-http": {
"version": "0.210.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/exporter-metrics-otlp-http/-/exporter-metrics-otlp-http-0.210.0.tgz",
"integrity": "sha512-JpLThG8Hh8A/Jzdzw9i4Ftu+EzvLaX/LouN+mOOHmadL0iror0Qsi3QWzucXeiUsDDsiYgjfKyi09e6sltytgA==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/core": "2.4.0",
"@opentelemetry/otlp-exporter-base": "0.210.0",
"@opentelemetry/otlp-transformer": "0.210.0",
"@opentelemetry/resources": "2.4.0",
"@opentelemetry/sdk-metrics": "2.4.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": "^1.3.0"
}
},
"node_modules/@opentelemetry/exporter-trace-otlp-http": {
"version": "0.210.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/exporter-trace-otlp-http/-/exporter-trace-otlp-http-0.210.0.tgz",
"integrity": "sha512-9JkyaCl70anEtuKZdoCQmjDuz1/paEixY/DWfsvHt7PGKq3t8/nQ/6/xwxHjG+SkPAUbo1Iq4h7STe7Pk2bc5A==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/core": "2.4.0",
"@opentelemetry/otlp-exporter-base": "0.210.0",
"@opentelemetry/otlp-transformer": "0.210.0",
"@opentelemetry/resources": "2.4.0",
"@opentelemetry/sdk-trace-base": "2.4.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": "^1.3.0"
}
},
"node_modules/@opentelemetry/otlp-exporter-base": {
"version": "0.210.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/otlp-exporter-base/-/otlp-exporter-base-0.210.0.tgz",
"integrity": "sha512-uk78DcZoBNHIm26h0oXc8Pizh4KDJ/y04N5k/UaI9J7xR7mL8QcMcYPQG9xxN7m8qotXOMDRW6qTAyptav4+3w==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/core": "2.4.0",
"@opentelemetry/otlp-transformer": "0.210.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": "^1.3.0"
}
},
"node_modules/@opentelemetry/otlp-transformer": {
"version": "0.210.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/otlp-transformer/-/otlp-transformer-0.210.0.tgz",
"integrity": "sha512-nkHBJVSJGOwkRZl+BFIr7gikA93/U8XkL2EWaiDbj3DVjmTEZQpegIKk0lT8oqQYfP8FC6zWNjuTfkaBVqa0ZQ==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/api-logs": "0.210.0",
"@opentelemetry/core": "2.4.0",
"@opentelemetry/resources": "2.4.0",
"@opentelemetry/sdk-logs": "0.210.0",
"@opentelemetry/sdk-metrics": "2.4.0",
"@opentelemetry/sdk-trace-base": "2.4.0",
"protobufjs": "8.0.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": "^1.3.0"
}
},
"node_modules/@opentelemetry/resources": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.4.0.tgz",
"integrity": "sha512-RWvGLj2lMDZd7M/5tjkI/2VHMpXebLgPKvBUd9LRasEWR2xAynDwEYZuLvY9P2NGG73HF07jbbgWX2C9oavcQg==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/core": "2.4.0",
"@opentelemetry/semantic-conventions": "^1.29.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": ">=1.3.0 <1.10.0"
}
},
"node_modules/@opentelemetry/sdk-logs": {
"version": "0.210.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-logs/-/sdk-logs-0.210.0.tgz",
"integrity": "sha512-YuaL92Dpyk/Kc1o4e9XiaWWwiC0aBFN+4oy+6A9TP4UNJmRymPMEX10r6EMMFMD7V0hktiSig9cwWo59peeLCQ==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/api-logs": "0.210.0",
"@opentelemetry/core": "2.4.0",
"@opentelemetry/resources": "2.4.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": ">=1.4.0 <1.10.0"
}
},
"node_modules/@opentelemetry/sdk-metrics": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.4.0.tgz",
"integrity": "sha512-qSbfq9mXbLMqmPEjijl32f3ZEmiHekebRggPdPjhHI6t1CsAQOR2Aw/SuTDftk3/l2aaPHpwP3xM2DkgBA1ANw==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/core": "2.4.0",
"@opentelemetry/resources": "2.4.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": ">=1.9.0 <1.10.0"
}
},
"node_modules/@opentelemetry/sdk-trace-base": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.4.0.tgz",
"integrity": "sha512-WH0xXkz/OHORDLKqaxcUZS0X+t1s7gGlumr2ebiEgNZQl2b0upK2cdoD0tatf7l8iP74woGJ/Kmxe82jdvcWRw==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/core": "2.4.0",
"@opentelemetry/resources": "2.4.0",
"@opentelemetry/semantic-conventions": "^1.29.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": ">=1.3.0 <1.10.0"
}
},
"node_modules/@opentelemetry/sdk-trace-web": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-web/-/sdk-trace-web-2.4.0.tgz",
"integrity": "sha512-1FYg7qnrgTugPev51SehxCp0v9J4P97MJn2MaXQ8QK//psfyLDorKAAC3LmSIhq7XaC726WSZ/Wm69r8NdjIsA==",
"license": "Apache-2.0",
"dependencies": {
"@opentelemetry/core": "2.4.0",
"@opentelemetry/sdk-trace-base": "2.4.0"
},
"engines": {
"node": "^18.19.0 || >=20.6.0"
},
"peerDependencies": {
"@opentelemetry/api": ">=1.0.0 <1.10.0"
}
},
"node_modules/@opentelemetry/semantic-conventions": {
"version": "1.39.0",
"resolved": "https://registry.npmjs.org/@opentelemetry/semantic-conventions/-/semantic-conventions-1.39.0.tgz",
"integrity": "sha512-R5R9tb2AXs2IRLNKLBJDynhkfmx7mX0vi8NkhZb3gUkPWHn6HXk5J8iQ/dql0U3ApfWym4kXXmBDRGO+oeOfjg==",
"license": "Apache-2.0",
"engines": {
"node": ">=14"
}
},
"node_modules/@pkgjs/parseargs": {
"version": "0.11.0",
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
@@ -3010,6 +3220,70 @@
"dev": true,
"license": "MIT"
},
"node_modules/@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/base64": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/codegen": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/eventemitter": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/fetch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
"license": "BSD-3-Clause",
"dependencies": {
"@protobufjs/aspromise": "^1.1.1",
"@protobufjs/inquire": "^1.1.0"
}
},
"node_modules/@protobufjs/float": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/inquire": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/path": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/pool": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/utf8": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
"license": "BSD-3-Clause"
},
"node_modules/@radix-ui/number": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz",
@@ -6577,7 +6851,6 @@
"version": "20.19.17",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.17.tgz",
"integrity": "sha512-gfehUI8N1z92kygssiuWvLiwcbOB3IRktR6hTDgJlXMYh5OvkPSRmgfoBUmfZt+vhwJtX7v1Yw4KvvAf7c5QKQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~6.21.0"
@@ -11721,6 +11994,12 @@
"dev": true,
"license": "MIT"
},
"node_modules/long": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
"license": "Apache-2.0"
},
"node_modules/loose-envify": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
@@ -13119,6 +13398,30 @@
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
"license": "MIT"
},
"node_modules/protobufjs": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-8.0.0.tgz",
"integrity": "sha512-jx6+sE9h/UryaCZhsJWbJtTEy47yXoGNYI4z8ZaRncM0zBKeRqjO2JEcOUYwrYGb1WLhXM1FfMzW3annvFv0rw==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@types/node": ">=13.7.0",
"long": "^5.0.0"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/proxy-addr": {
"version": "2.0.7",
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
@@ -15451,7 +15754,6 @@
"version": "6.21.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
"dev": true,
"license": "MIT"
},
"node_modules/unicode-canonical-property-names-ecmascript": {

View File

@@ -30,6 +30,13 @@
},
"dependencies": {
"@hookform/resolvers": "^3.3.2",
"@opentelemetry/api": "^1.9.0",
"@opentelemetry/exporter-metrics-otlp-http": "^0.210.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.210.0",
"@opentelemetry/resources": "^2.4.0",
"@opentelemetry/sdk-metrics": "^2.4.0",
"@opentelemetry/sdk-trace-web": "^2.4.0",
"@opentelemetry/semantic-conventions": "^1.39.0",
"@radix-ui/react-accordion": "^1.1.2",
"@radix-ui/react-checkbox": "^1.0.4",
"@radix-ui/react-dialog": "^1.0.5",

View File

@@ -0,0 +1,66 @@
import React, { useState } from 'react';
import { trackUserAction, trackUserLocation } from '../utils/analytics';
const AnalyticsTestComponent: React.FC = () => {
const [locationStatus, setLocationStatus] = useState<string>('Not requested');
const [actionStatus, setActionStatus] = useState<string>('');
const handleTrackLocation = async () => {
try {
setLocationStatus('Requesting...');
await trackUserLocation();
setLocationStatus('Location tracked successfully!');
} catch (error) {
setLocationStatus('Error tracking location');
console.error('Location tracking error:', error);
}
};
const handleTrackAction = () => {
const actionName = `button_click_${Date.now()}`;
trackUserAction(actionName, {
component: 'AnalyticsTestComponent',
timestamp: new Date().toISOString()
});
setActionStatus(`Action "${actionName}" tracked`);
};
return (
<div className="p-6 max-w-2xl mx-auto bg-white rounded-lg shadow-md">
<h2 className="text-xl font-bold mb-4">Analytics Test Component</h2>
<div className="mb-4">
<button
onClick={handleTrackLocation}
className="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4"
>
Track Location
</button>
<span className="text-sm text-gray-600">{locationStatus}</span>
</div>
<div className="mb-4">
<button
onClick={handleTrackAction}
className="bg-green-500 hover:bg-green-700 text-white font-bold py-2 px-4 rounded"
>
Track Action
</button>
<span className="text-sm text-gray-600 ml-4">{actionStatus}</span>
</div>
<div className="mt-6 p-4 bg-gray-100 rounded">
<h3 className="font-semibold mb-2">Expected Behavior:</h3>
<ul className="list-disc pl-5 space-y-1 text-sm">
<li>Page views are automatically tracked when this component loads</li>
<li>Session information is captured on initial load</li>
<li>Browser and device info is collected automatically</li>
<li>Clicking buttons will generate user action traces</li>
<li>Location tracking requires user permission</li>
</ul>
</div>
</div>
);
};
export default AnalyticsTestComponent;

View File

@@ -5,6 +5,9 @@ interface RuntimeConfig {
VITE_API_URL: string;
VITE_APP_TITLE: string;
VITE_APP_VERSION: string;
VITE_OTEL_TRACES_ENDPOINT?: string;
VITE_OTEL_METRICS_ENDPOINT?: string;
VITE_OTEL_ENABLED?: string;
}
declare global {
@@ -27,6 +30,9 @@ function getRuntimeConfig(): RuntimeConfig {
VITE_API_URL: import.meta.env.VITE_API_URL || 'http://localhost:8000',
VITE_APP_TITLE: import.meta.env.VITE_APP_TITLE || 'PanIA Dashboard',
VITE_APP_VERSION: import.meta.env.VITE_APP_VERSION || '1.0.0',
VITE_OTEL_TRACES_ENDPOINT: import.meta.env.VITE_OTEL_TRACES_ENDPOINT || '/api/v1/telemetry/v1/traces',
VITE_OTEL_METRICS_ENDPOINT: import.meta.env.VITE_OTEL_METRICS_ENDPOINT || '/api/v1/telemetry/v1/metrics',
VITE_OTEL_ENABLED: import.meta.env.VITE_OTEL_ENABLED || 'true',
};
}
@@ -52,6 +58,21 @@ export function isKubernetesEnvironment(): boolean {
return typeof window !== 'undefined' && !!window.__RUNTIME_CONFIG__;
}
// Helper to check if OpenTelemetry is enabled
export function isOpenTelemetryEnabled(): boolean {
return config.VITE_OTEL_ENABLED?.toLowerCase() !== 'false';
}
// Helper to get OpenTelemetry traces endpoint
export function getOtelTracesEndpoint(): string {
return config.VITE_OTEL_TRACES_ENDPOINT || '/api/v1/telemetry/v1/traces';
}
// Helper to get OpenTelemetry metrics endpoint
export function getOtelMetricsEndpoint(): string {
return config.VITE_OTEL_METRICS_ENDPOINT || '/api/v1/telemetry/v1/metrics';
}
// Debug function to log current configuration
export function logConfig(): void {
console.log('Current configuration:', {

View File

@@ -0,0 +1,33 @@
import {
trackPageView,
trackUserAction,
trackUserLocation,
trackSession,
getCurrentUserId,
isAnalyticsEnabled
} from '../utils/analytics';
/**
* React Hook for analytics
*
* NOTE: Page view tracking is handled globally by initializeAnalytics() in main.tsx.
* This hook only exposes tracking functions for use in components.
* Do NOT add automatic page tracking here to avoid duplicate events.
*/
export const useAnalytics = () => {
return {
// Manual page view tracking (use only for custom page events, not navigation)
trackPageView,
// Track user actions (button clicks, form submissions, etc.)
trackUserAction,
// Track user location (requires consent)
trackUserLocation,
// Track session (typically called once at app init)
trackSession,
// Get current user ID
getCurrentUserId,
// Check if analytics are enabled
isAnalyticsEnabled
};
};

View File

@@ -7,6 +7,92 @@ import './styles/animations.css';
import './styles/themes/light.css';
import './styles/themes/dark.css';
// OpenTelemetry Web SDK initialization
import { WebTracerProvider } from '@opentelemetry/sdk-trace-web';
import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
import { resourceFromAttributes } from '@opentelemetry/resources';
import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic-conventions';
import { MeterProvider, PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics';
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http';
import { metrics } from '@opentelemetry/api';
// Import analytics utilities
import { initializeAnalytics } from './utils/analytics';
// Import configuration
import { isOpenTelemetryEnabled, getOtelTracesEndpoint, getOtelMetricsEndpoint } from './config/runtime';
// Store cleanup function for proper teardown
let analyticsCleanup: (() => void) | null = null;
// Initialize OpenTelemetry
const initOpenTelemetry = () => {
// Check if OpenTelemetry is enabled in configuration
if (!isOpenTelemetryEnabled()) {
console.log('OpenTelemetry disabled by configuration');
return;
}
try {
// Create resource with service information using non-deprecated attributes
const resource = resourceFromAttributes({
[ATTR_SERVICE_NAME]: 'bakery-frontend',
[ATTR_SERVICE_VERSION]: '1.0.0'
});
// Initialize tracer with span processor
const traceExporter = new OTLPTraceExporter({
url: getOtelTracesEndpoint() // Using configured endpoint
});
const traceProvider = new WebTracerProvider({
resource: resource,
// Add span processors as array for current OpenTelemetry SDK version
spanProcessors: [new BatchSpanProcessor(traceExporter)]
});
traceProvider.register();
// Initialize metrics
const metricExporter = new OTLPMetricExporter({
url: getOtelMetricsEndpoint()
});
const metricReader = new PeriodicExportingMetricReader({
exporter: metricExporter,
exportIntervalMillis: 10000, // 10 seconds
});
// Use the MeterProvider constructor with readers array
const meterProvider = new MeterProvider({
resource: resource,
readers: [metricReader]
});
// Register the meter provider globally using proper API
metrics.setGlobalMeterProvider(meterProvider);
console.log('OpenTelemetry initialized for frontend');
} catch (error) {
console.error('Failed to initialize OpenTelemetry:', error);
// Continue without OpenTelemetry if initialization fails
}
};
// Initialize OpenTelemetry before rendering the app
initOpenTelemetry();
// Initialize analytics tracking and store cleanup function
analyticsCleanup = initializeAnalytics();
// Cleanup on page unload
window.addEventListener('beforeunload', () => {
if (analyticsCleanup) {
analyticsCleanup();
}
});
// PWA/ServiceWorker functionality removed to avoid conflicts in development
ReactDOM.createRoot(document.getElementById('root')!).render(

View File

@@ -0,0 +1,301 @@
import { trace } from '@opentelemetry/api';
import { ATTR_HTTP_ROUTE } from '@opentelemetry/semantic-conventions';
// Types and Interfaces
interface AnalyticsMetadata {
[key: string]: string | number | boolean | undefined;
}
// Constants
const ANALYTICS_ENABLED_KEY = 'analyticsEnabled';
const LOCATION_CONSENT_KEY = 'locationTrackingConsent';
const SESSION_ID_KEY = 'sessionId';
const USER_ID_KEY = 'userId';
// Generate a unique session ID
const generateSessionId = (): string => {
return Date.now().toString(36) + Math.random().toString(36).substring(2);
};
// Get current user ID (implement based on your auth system)
export const getCurrentUserId = (): string | null => {
// This is a placeholder - implement based on your authentication system
// For example, you might get this from localStorage, cookies, or context
return localStorage.getItem(USER_ID_KEY) || sessionStorage.getItem(USER_ID_KEY) || null;
};
// Track page view
export const trackPageView = (pathname: string): void => {
// Check if analytics are enabled
if (!isAnalyticsEnabled()) {
return;
}
try {
const tracer = trace.getTracer('bakery-frontend');
const user_id = getCurrentUserId();
const span = tracer.startSpan('page_view', {
attributes: {
[ATTR_HTTP_ROUTE]: pathname,
'user.id': user_id || 'anonymous',
'page.path': pathname,
}
});
// End the span immediately for page views
span.end();
} catch (error) {
console.error('Failed to track page view:', error);
}
};
// Check if analytics are enabled
export const isAnalyticsEnabled = (): boolean => {
return localStorage.getItem(ANALYTICS_ENABLED_KEY) !== 'false';
};
// Enable or disable analytics
export const setAnalyticsEnabled = (enabled: boolean): void => {
localStorage.setItem(ANALYTICS_ENABLED_KEY, enabled.toString());
};
// Check if location tracking consent is granted
export const isLocationTrackingConsentGranted = (): boolean => {
return localStorage.getItem(LOCATION_CONSENT_KEY) === 'granted';
};
// Set location tracking consent
export const setLocationTrackingConsent = (granted: boolean): void => {
localStorage.setItem(LOCATION_CONSENT_KEY, granted ? 'granted' : 'denied');
};
// Track user session
export const trackSession = (): (() => void) => {
// Check if analytics are enabled
if (!isAnalyticsEnabled()) {
console.log('Analytics disabled by user preference');
return () => {}; // Return no-op cleanup function
}
try {
const tracer = trace.getTracer('bakery-frontend');
const sessionId = generateSessionId();
const userId = getCurrentUserId();
const span = tracer.startSpan('user_session', {
attributes: {
'session.id': sessionId,
'user.id': userId || 'anonymous',
'browser.user_agent': navigator.userAgent,
'screen.width': window.screen.width.toString(),
'screen.height': window.screen.height.toString(),
'device.type': /mobile|tablet|ipad|iphone|ipod|android|silk/i.test(navigator.userAgent) ? 'mobile' : 'desktop'
}
});
// Store session ID in sessionStorage for later use
sessionStorage.setItem(SESSION_ID_KEY, sessionId);
// End span when session ends
const handleBeforeUnload = () => {
span.end();
};
window.addEventListener('beforeunload', handleBeforeUnload);
// Clean up event listener when needed
return () => {
window.removeEventListener('beforeunload', handleBeforeUnload);
};
} catch (error) {
console.error('Failed to track session:', error);
return () => {}; // Return no-op cleanup function
}
};
// Track user action
export const trackUserAction = (action: string, metadata?: AnalyticsMetadata): void => {
// Check if analytics are enabled
if (!isAnalyticsEnabled()) {
return;
}
try {
const tracer = trace.getTracer('bakery-frontend');
const userId = getCurrentUserId();
const span = tracer.startSpan('user_action', {
attributes: {
'user.action': action,
'user.id': userId || 'anonymous',
...metadata
}
});
span.end();
} catch (error) {
console.error('Failed to track user action:', error);
}
};
// Track user location (with consent)
export const trackUserLocation = async (): Promise<void> => {
// Check if analytics are enabled
if (!isAnalyticsEnabled()) {
return;
}
// Check if location tracking consent is granted
if (!isLocationTrackingConsentGranted()) {
console.log('Location tracking consent not granted');
return;
}
try {
const position = await new Promise<GeolocationPosition>((resolve, reject) => {
if (!navigator.geolocation) {
reject(new Error('Geolocation not supported'));
return;
}
navigator.geolocation.getCurrentPosition(resolve, reject, {
enableHighAccuracy: false,
timeout: 10000,
maximumAge: 300000 // 5 minutes
});
});
const tracer = trace.getTracer('bakery-frontend');
const userId = getCurrentUserId();
const span = tracer.startSpan('user_location', {
attributes: {
'user.id': userId || 'anonymous',
'location.latitude': position.coords.latitude,
'location.longitude': position.coords.longitude,
'location.accuracy': position.coords.accuracy,
'location.altitude': position.coords.altitude ?? undefined,
'location.speed': position.coords.speed ?? undefined,
'location.heading': position.coords.heading ?? undefined
}
});
span.end();
} catch (error) {
console.log('Location access denied or unavailable:', error);
}
};
// Initialize analytics tracking
export const initializeAnalytics = (): (() => void) => {
// Track initial session
const cleanupSession = trackSession();
// Track initial page view
trackPageView(window.location.pathname);
// Listen for route changes (for SPA navigation)
let previousUrl = window.location.href;
// For hash-based routing
const handleHashChange = () => {
if (window.location.href !== previousUrl) {
trackPageView(window.location.pathname + window.location.search);
previousUrl = window.location.href;
}
};
// For history API-based routing (most common in React apps)
// Use proper typing for history state methods
const originalPushState = history.pushState.bind(history);
const handlePushState = function (
this: History,
data: unknown,
unused: string,
url?: string | URL | null
) {
originalPushState(data, unused, url);
setTimeout(() => {
if (window.location.href !== previousUrl) {
trackPageView(window.location.pathname + window.location.search);
previousUrl = window.location.href;
}
}, 0);
};
const originalReplaceState = history.replaceState.bind(history);
const handleReplaceState = function (
this: History,
data: unknown,
unused: string,
url?: string | URL | null
) {
originalReplaceState(data, unused, url);
setTimeout(() => {
if (window.location.href !== previousUrl) {
trackPageView(window.location.pathname + window.location.search);
previousUrl = window.location.href;
}
}, 0);
};
// Override history methods
history.pushState = handlePushState;
history.replaceState = handleReplaceState;
// Add event listeners
window.addEventListener('hashchange', handleHashChange);
// Track user consent for location if needed
if (isLocationTrackingConsentGranted()) {
trackUserLocation();
}
// Return cleanup function
return () => {
// Restore original history methods
history.pushState = originalPushState;
history.replaceState = originalReplaceState;
// Remove event listeners
window.removeEventListener('hashchange', handleHashChange);
// Clean up session tracking
cleanupSession();
};
};
// Function to track custom metrics using OpenTelemetry spans
export const trackCustomMetric = (
name: string,
value: number,
attributes?: Record<string, string>
): void => {
// Check if analytics are enabled
if (!isAnalyticsEnabled()) {
return;
}
try {
// Record metric as a span with the value as an attribute
// This approach works well for browser-based metrics since
// the OpenTelemetry metrics API in browsers sends to the same collector
const tracer = trace.getTracer('bakery-frontend');
const userId = getCurrentUserId();
const span = tracer.startSpan('custom_metric', {
attributes: {
'metric.name': name,
'metric.value': value,
'user.id': userId || 'anonymous',
...attributes
}
});
span.end();
} catch (error) {
// Log error but don't fail - metrics are non-critical
console.warn('Failed to track custom metric:', error);
}
};

View File

@@ -51,10 +51,11 @@ export default defineConfig(({ mode }) => {
build: {
outDir: 'dist',
// For production builds: ensure assets have correct paths
// Base path should be '/' for root deployment
// Base path should match the deployment URL
base: process.env.VITE_BASE_URL || '/',
// In development mode: inline source maps for better debugging
// In production mode: external source maps
sourcemap: isDevelopment ? 'inline' : true,
// In production mode: external source maps (can be disabled with VITE_DISABLE_SOURCEMAPS)
sourcemap: process.env.VITE_DISABLE_SOURCEMAPS ? false : (isDevelopment ? 'inline' : true),
// In development mode: disable minification for readable errors
// In production mode: use esbuild minification
minify: isDevelopment ? false : 'esbuild',

View File

@@ -25,7 +25,7 @@ from app.middleware.rate_limiting import APIRateLimitMiddleware
from app.middleware.subscription import SubscriptionMiddleware
from app.middleware.demo_middleware import DemoMiddleware
from app.middleware.read_only_mode import ReadOnlyModeMiddleware
from app.routes import auth, tenant, registration, nominatim, subscription, demo, pos, geocoding, poi_context, webhooks
from app.routes import auth, tenant, registration, nominatim, subscription, demo, pos, geocoding, poi_context, webhooks, telemetry
# Initialize logger
logger = structlog.get_logger()
@@ -169,6 +169,9 @@ app.include_router(demo.router, prefix="/api/v1", tags=["demo"])
# Webhook routes are defined with full /api/v1/webhooks/* paths for consistency
app.include_router(webhooks.router, prefix="", tags=["webhooks"])
# Include telemetry routes for frontend OpenTelemetry data
app.include_router(telemetry.router, prefix="/api/v1", tags=["telemetry"])
# ================================================================
# SERVER-SENT EVENTS (SSE) HELPER FUNCTIONS

View File

@@ -47,7 +47,10 @@ PUBLIC_ROUTES = [
"/api/v1/demo/accounts",
"/api/v1/demo/sessions",
"/api/v1/webhooks/stripe", # Stripe webhook endpoint - bypasses auth for signature verification
"/api/v1/webhooks/generic" # Generic webhook endpoint
"/api/v1/webhooks/generic", # Generic webhook endpoint
"/api/v1/telemetry/v1/traces", # Frontend telemetry traces - no auth for performance
"/api/v1/telemetry/v1/metrics", # Frontend telemetry metrics - no auth for performance
"/api/v1/telemetry/health" # Telemetry health check
]
# Routes accessible with demo session (no JWT required, just demo session header)

View File

@@ -0,0 +1,303 @@
"""
Telemetry routes for API Gateway - Handles frontend telemetry data
This module provides endpoints for:
- Receiving OpenTelemetry traces from frontend
- Proxying traces to Signoz OTel collector
- Providing a secure, authenticated endpoint for frontend telemetry
"""
from fastapi import APIRouter, Request, HTTPException, status
from fastapi.responses import JSONResponse, Response
import httpx
import logging
import os
from typing import Optional
from app.core.config import settings
from app.core.header_manager import header_manager
from shared.monitoring.metrics import MetricsCollector, create_metrics_collector
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/telemetry", tags=["telemetry"])
# Get Signoz OTel collector endpoint from environment or use default
SIGNOZ_OTEL_COLLECTOR = os.getenv(
"SIGNOZ_OTEL_COLLECTOR_URL",
"http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
)
@router.post("/v1/traces")
async def receive_frontend_traces(request: Request):
"""
Receive OpenTelemetry traces from frontend and proxy to Signoz
This endpoint:
- Accepts OTLP trace data from frontend
- Validates the request
- Proxies to Signoz OTel collector
- Handles errors gracefully
"""
# Handle OPTIONS requests for CORS
if request.method == "OPTIONS":
return Response(
status_code=200,
headers={
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
"Access-Control-Allow-Credentials": "true",
"Access-Control-Max-Age": "86400"
}
)
try:
# Get the trace data from the request
body = await request.body()
if not body:
logger.warning("Received empty trace data from frontend")
return JSONResponse(
status_code=400,
content={"error": "Empty trace data"}
)
# Log the trace reception (without sensitive data)
logger.info(
"Received frontend traces, content_length=%s, content_type=%s, user_agent=%s",
len(body),
request.headers.get("content-type"),
request.headers.get("user-agent")
)
# Forward to Signoz OTel collector
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/traces"
# Set up headers for the Signoz collector
forward_headers = {
"Content-Type": request.headers.get("content-type", "application/json"),
"User-Agent": "bakery-gateway/1.0",
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
}
# Add authentication if configured
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
if signoz_auth_token:
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
# Send to Signoz collector
timeout_config = httpx.Timeout(
connect=5.0,
read=10.0,
write=5.0,
pool=5.0
)
async with httpx.AsyncClient(timeout=timeout_config) as client:
response = await client.post(
url=target_url,
content=body,
headers=forward_headers
)
# Log the response from Signoz
logger.info(
"Forwarded traces to Signoz, signoz_status=%s, signoz_response_time=%s",
response.status_code,
response.elapsed.total_seconds()
)
# Return success response to frontend
return JSONResponse(
status_code=200,
content={
"message": "Traces received and forwarded to Signoz",
"signoz_status": response.status_code,
"trace_count": 1 # We don't know exact count without parsing
}
)
except httpx.HTTPStatusError as e:
logger.error(
"Signoz collector returned error, status_code=%s, error_message=%s",
e.response.status_code,
str(e)
)
return JSONResponse(
status_code=502,
content={
"error": "Signoz collector error",
"details": str(e),
"signoz_status": e.response.status_code
}
)
except httpx.RequestError as e:
logger.error(
"Failed to connect to Signoz collector, error=%s, collector_url=%s",
str(e),
SIGNOZ_OTEL_COLLECTOR
)
return JSONResponse(
status_code=503,
content={
"error": "Signoz collector unavailable",
"details": str(e)
}
)
except Exception as e:
logger.error(
"Unexpected error processing traces, error=%s, error_type=%s",
str(e),
type(e).__name__
)
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"details": str(e)
}
)
@router.post("/v1/metrics")
async def receive_frontend_metrics(request: Request):
"""
Receive OpenTelemetry metrics from frontend and proxy to Signoz
"""
# Handle OPTIONS requests for CORS
if request.method == "OPTIONS":
return Response(
status_code=200,
headers={
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
"Access-Control-Allow-Credentials": "true",
"Access-Control-Max-Age": "86400"
}
)
try:
body = await request.body()
if not body:
return JSONResponse(
status_code=400,
content={"error": "Empty metrics data"}
)
logger.info(
"Received frontend metrics, content_length=%s, content_type=%s",
len(body),
request.headers.get("content-type")
)
# Forward to Signoz OTel collector
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/metrics"
forward_headers = {
"Content-Type": request.headers.get("content-type", "application/json"),
"User-Agent": "bakery-gateway/1.0",
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
}
# Add authentication if configured
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
if signoz_auth_token:
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
timeout_config = httpx.Timeout(
connect=5.0,
read=10.0,
write=5.0,
pool=5.0
)
async with httpx.AsyncClient(timeout=timeout_config) as client:
response = await client.post(
url=target_url,
content=body,
headers=forward_headers
)
logger.info(
"Forwarded metrics to Signoz, signoz_status=%s",
response.status_code
)
return JSONResponse(
status_code=200,
content={
"message": "Metrics received and forwarded to Signoz",
"signoz_status": response.status_code
}
)
except Exception as e:
logger.error(
"Error processing metrics, error=%s",
str(e)
)
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"details": str(e)
}
)
@router.get("/health")
async def telemetry_health():
"""
Health check endpoint for telemetry service
"""
return JSONResponse(
status_code=200,
content={
"status": "healthy",
"service": "telemetry-gateway",
"signoz_collector": SIGNOZ_OTEL_COLLECTOR
}
)
# Initialize metrics for this module
try:
metrics_collector = create_metrics_collector("gateway-telemetry")
except Exception as e:
logger.error("Failed to create metrics collector, error=%s", str(e))
metrics_collector = None
@router.on_event("startup")
async def startup_event():
"""Initialize telemetry metrics on startup"""
try:
if metrics_collector:
# Register telemetry-specific metrics
metrics_collector.register_counter(
"gateway_telemetry_traces_received",
"Number of trace batches received from frontend"
)
metrics_collector.register_counter(
"gateway_telemetry_metrics_received",
"Number of metric batches received from frontend"
)
metrics_collector.register_counter(
"gateway_telemetry_errors",
"Number of telemetry processing errors"
)
logger.info(
"Telemetry gateway initialized, signoz_collector=%s",
SIGNOZ_OTEL_COLLECTOR
)
except Exception as e:
logger.error(
"Failed to initialize telemetry metrics, error=%s",
str(e)
)

View File

@@ -6,7 +6,7 @@
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-prod.yaml
global:
storageClass: "standard" # For MicroK8s, use "microk8s-hostpath" or custom storage class
storageClass: "microk8s-hostpath" # For MicroK8s, use "microk8s-hostpath" or custom storage class
clusterName: "bakery-ia-prod"
domain: "monitoring.bakewise.ai"
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)

View File

@@ -140,10 +140,9 @@ spec:
name: pos-integration-secrets
- secretRef:
name: whatsapp-secrets
volumeMounts:
- name: model-storage
mountPath: /app/models
readOnly: true # Forecasting only reads models
- secretRef:
name: minio-secrets
# Model storage now uses MinIO - no local volumeMounts needed
resources:
requests:
memory: "512Mi"
@@ -172,10 +171,7 @@ spec:
secret:
secretName: redis-tls-secret
defaultMode: 0400
- name: model-storage
persistentVolumeClaim:
claimName: model-storage
readOnly: true # Forecasting only reads models
# Model storage migrated to MinIO - PVC no longer needed
---
apiVersion: v1

View File

@@ -56,6 +56,11 @@ spec:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: SIGNOZ_OTEL_COLLECTOR_URL
valueFrom:
configMapKeyRef:
name: bakery-config
key: SIGNOZ_OTEL_COLLECTOR_URL
resources:
requests:
memory: "256Mi"

View File

@@ -0,0 +1,154 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: minio
namespace: bakery-ia
labels:
app.kubernetes.io/name: minio
app.kubernetes.io/component: storage
app.kubernetes.io/part-of: bakery-ia
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: minio
app.kubernetes.io/component: storage
template:
metadata:
labels:
app.kubernetes.io/name: minio
app.kubernetes.io/component: storage
spec:
# Init container to set up TLS certificates with correct permissions
initContainers:
- name: init-certs
image: busybox:1.36
command:
- sh
- -c
- |
mkdir -p /certs/CAs
cp /certs-secret/minio-cert.pem /certs/public.crt
cp /certs-secret/minio-key.pem /certs/private.key
cp /certs-secret/ca-cert.pem /certs/CAs/ca.crt
chmod 600 /certs/private.key
chmod 644 /certs/public.crt /certs/CAs/ca.crt
volumeMounts:
- name: certs-secret
mountPath: /certs-secret
readOnly: true
- name: certs
mountPath: /certs
containers:
- name: minio
image: minio/minio:RELEASE.2024-11-07T00-52-20Z
args:
- server
- /data
- --console-address
- :9001
- --address
- :9000
- --certs-dir
- /certs
env:
- name: MINIO_ROOT_USER
valueFrom:
secretKeyRef:
name: minio-secrets
key: MINIO_ROOT_USER
- name: MINIO_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: minio-secrets
key: MINIO_ROOT_PASSWORD
# Enable TLS for MinIO
- name: MINIO_SERVER_URL
value: "https://minio.bakery-ia.svc.cluster.local:9000"
- name: MINIO_BROWSER_REDIRECT_URL
value: "https://minio-console.bakery-ia.svc.cluster.local:9001"
ports:
- containerPort: 9000
name: api
- containerPort: 9001
name: console
volumeMounts:
- name: minio-data
mountPath: /data
- name: certs
mountPath: /certs
readOnly: true
resources:
requests:
memory: "512Mi"
cpu: "200m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /minio/health/live
port: 9000
scheme: HTTPS
initialDelaySeconds: 30
periodSeconds: 30
readinessProbe:
httpGet:
path: /minio/health/ready
port: 9000
scheme: HTTPS
initialDelaySeconds: 5
periodSeconds: 15
volumes:
- name: minio-data
persistentVolumeClaim:
claimName: minio-data
- name: certs-secret
secret:
secretName: minio-tls
- name: certs
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: minio
namespace: bakery-ia
labels:
app.kubernetes.io/name: minio
app.kubernetes.io/component: storage
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: api
- port: 9001
targetPort: 9001
protocol: TCP
name: console
selector:
app.kubernetes.io/name: minio
app.kubernetes.io/component: storage
---
apiVersion: v1
kind: Service
metadata:
name: minio-console
namespace: bakery-ia
labels:
app.kubernetes.io/name: minio
app.kubernetes.io/component: storage
spec:
type: ClusterIP
ports:
- port: 9001
targetPort: 9001
protocol: TCP
name: console
selector:
app.kubernetes.io/name: minio
app.kubernetes.io/component: storage

View File

@@ -0,0 +1,16 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: minio-data
namespace: bakery-ia
labels:
app.kubernetes.io/name: minio-data
app.kubernetes.io/component: storage
app.kubernetes.io/part-of: bakery-ia
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
storageClassName: standard

View File

@@ -0,0 +1,22 @@
apiVersion: v1
kind: Secret
metadata:
name: minio-secrets
namespace: bakery-ia
labels:
app.kubernetes.io/name: minio-secrets
app.kubernetes.io/component: storage
app.kubernetes.io/part-of: bakery-ia
type: Opaque
data:
# MinIO Root Credentials (base64 encoded)
MINIO_ROOT_USER: YWRtaW4= # admin
MINIO_ROOT_PASSWORD: c2VjdXJlLXBhc3N3b3Jk # secure-password
# Service Account Credentials for applications
MINIO_ACCESS_KEY: dHJhaW5pbmctc2VydmljZQ== # training-service
MINIO_SECRET_KEY: dHJhaW5pbmctc2VjcmV0LWtleQ== # training-secret-key
# Forecasting Service Credentials
FORECASTING_MINIO_ACCESS_KEY: Zm9yZWNhc3Rpbmctc2VydmljZQ== # forecasting-service
FORECASTING_MINIO_SECRET_KEY: Zm9yZWNhc3Rpbmctc2VjcmV0LWtleQ== # forecasting-secret-key

View File

@@ -140,11 +140,11 @@ spec:
name: pos-integration-secrets
- secretRef:
name: whatsapp-secrets
- secretRef:
name: minio-secrets
volumeMounts:
- name: tmp-storage
mountPath: /tmp
- name: model-storage
mountPath: /app/models
resources:
requests:
memory: "512Mi"
@@ -176,9 +176,6 @@ spec:
- name: tmp-storage
emptyDir:
sizeLimit: 4Gi # Increased from 2Gi to handle cmdstan temp files during optimization
- name: model-storage
persistentVolumeClaim:
claimName: model-storage
---
apiVersion: v1

View File

@@ -1,16 +0,0 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: model-storage
namespace: bakery-ia
labels:
app.kubernetes.io/name: model-storage
app.kubernetes.io/component: storage
app.kubernetes.io/part-of: bakery-ia
spec:
accessModes:
- ReadWriteOnce # Single node access (works with local Kubernetes)
resources:
requests:
storage: 10Gi # Adjust based on your needs
storageClassName: standard # Use default local-path provisioner

View File

@@ -66,6 +66,17 @@ data:
ALERT_PROCESSOR_DB_HOST: "alert-processor-db-service"
AI_INSIGHTS_DB_HOST: "ai-insights-db-service"
DISTRIBUTION_DB_HOST: "distribution-db-service"
DEMO_SESSION_DB_HOST: "demo-session-db-service"
# MinIO Configuration
MINIO_ENDPOINT: "minio.bakery-ia.svc.cluster.local:9000"
MINIO_USE_SSL: "true"
MINIO_MODEL_BUCKET: "training-models"
MINIO_CONSOLE_PORT: "9001"
MINIO_API_PORT: "9000"
MINIO_REGION: "us-east-1"
MINIO_MODEL_LIFECYCLE_DAYS: "90"
MINIO_CACHE_TTL_SECONDS: "3600"
# Database Configuration
DB_PORT: "5432"
@@ -238,7 +249,8 @@ data:
# ================================================================
# MODEL STORAGE & TRAINING
# ================================================================
MODEL_STORAGE_PATH: "/app/models"
# Model storage is handled by MinIO (see MinIO Configuration section)
MODEL_STORAGE_BACKEND: "minio"
MODEL_BACKUP_ENABLED: "true"
MODEL_VERSIONING_ENABLED: "true"
MAX_TRAINING_TIME_MINUTES: "30"
@@ -416,6 +428,9 @@ data:
# OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: "signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
# OTEL_EXPORTER_OTLP_LOGS_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
# Gateway telemetry proxy configuration
SIGNOZ_OTEL_COLLECTOR_URL: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
# Optional: Protocol overrides per signal
# OTEL_EXPORTER_OTLP_TRACES_PROTOCOL: "grpc"
# OTEL_EXPORTER_OTLP_METRICS_PROTOCOL: "grpc"

View File

@@ -0,0 +1,193 @@
apiVersion: batch/v1
kind: Job
metadata:
name: minio-bucket-init
namespace: bakery-ia
labels:
app.kubernetes.io/name: minio-bucket-init
app.kubernetes.io/component: storage-init
app.kubernetes.io/part-of: bakery-ia
spec:
ttlSecondsAfterFinished: 300
backoffLimit: 3
template:
metadata:
labels:
app.kubernetes.io/name: minio-bucket-init
app.kubernetes.io/component: storage-init
spec:
restartPolicy: OnFailure
initContainers:
# Wait for MinIO to be ready
- name: wait-for-minio
image: busybox:1.36
command:
- sh
- -c
- |
echo "Waiting for MinIO to be ready..."
until nc -z minio.bakery-ia.svc.cluster.local 9000; do
echo "MinIO not ready, waiting..."
sleep 5
done
echo "MinIO is ready!"
containers:
- name: bucket-init
image: minio/mc:RELEASE.2024-11-17T19-35-25Z
command:
- /bin/sh
- -c
- |
set -e
echo "Configuring MinIO client..."
# Configure mc alias with TLS (skip cert verification for self-signed)
mc alias set myminio https://minio.bakery-ia.svc.cluster.local:9000 \
${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD} --insecure
echo "Creating buckets..."
# Create training-models bucket if not exists
if ! mc ls myminio/training-models --insecure 2>/dev/null; then
mc mb myminio/training-models --insecure
echo "Created bucket: training-models"
else
echo "Bucket already exists: training-models"
fi
# Set bucket policy (private by default)
mc anonymous set none myminio/training-models --insecure
# Enable versioning for model backups
mc version enable myminio/training-models --insecure
echo "Enabled versioning on training-models bucket"
# Set lifecycle policy to expire old versions after 90 days
cat > /tmp/lifecycle.json << 'EOF'
{
"Rules": [
{
"ID": "expire-old-versions",
"Status": "Enabled",
"Filter": {
"Prefix": "models/"
},
"NoncurrentVersionExpiration": {
"NoncurrentDays": 90
}
},
{
"ID": "expire-old-metadata",
"Status": "Enabled",
"Filter": {
"Prefix": "models/"
},
"Expiration": {
"ExpiredObjectDeleteMarker": true
}
}
]
}
EOF
mc ilm import myminio/training-models < /tmp/lifecycle.json --insecure || true
echo "Lifecycle policy configured"
# Create service accounts with limited permissions
echo "Creating service accounts..."
# Training service policy (read/write models)
cat > /tmp/training-policy.json << 'EOF'
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:ListBucketMultipartUploads"
],
"Resource": [
"arn:aws:s3:::training-models",
"arn:aws:s3:::training-models/*"
]
}
]
}
EOF
# Forecasting service policy (read-only models)
cat > /tmp/forecasting-policy.json << 'EOF'
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::training-models",
"arn:aws:s3:::training-models/*"
]
}
]
}
EOF
# Create service accounts using credentials from secrets
echo "Creating service accounts..."
mc admin user add myminio ${TRAINING_MINIO_USER} ${TRAINING_MINIO_PASSWORD} --insecure 2>/dev/null || true
mc admin user add myminio ${FORECASTING_MINIO_USER} ${FORECASTING_MINIO_PASSWORD} --insecure 2>/dev/null || true
# Apply policies (ignore errors if already exists)
mc admin policy create myminio training-policy /tmp/training-policy.json --insecure 2>/dev/null || true
mc admin policy attach myminio training-policy --user=${TRAINING_MINIO_USER} --insecure 2>/dev/null || true
mc admin policy create myminio forecasting-policy /tmp/forecasting-policy.json --insecure 2>/dev/null || true
mc admin policy attach myminio forecasting-policy --user=${FORECASTING_MINIO_USER} --insecure 2>/dev/null || true
echo "MinIO bucket initialization complete!"
# List buckets for verification
echo "Current buckets:"
mc ls myminio --insecure
env:
- name: MINIO_ROOT_USER
valueFrom:
secretKeyRef:
name: minio-secrets
key: MINIO_ROOT_USER
- name: MINIO_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: minio-secrets
key: MINIO_ROOT_PASSWORD
# Training service MinIO credentials
- name: TRAINING_MINIO_USER
valueFrom:
secretKeyRef:
name: minio-secrets
key: MINIO_ACCESS_KEY
- name: TRAINING_MINIO_PASSWORD
valueFrom:
secretKeyRef:
name: minio-secrets
key: MINIO_SECRET_KEY
# Forecasting service MinIO credentials
- name: FORECASTING_MINIO_USER
valueFrom:
secretKeyRef:
name: minio-secrets
key: FORECASTING_MINIO_ACCESS_KEY
- name: FORECASTING_MINIO_PASSWORD
valueFrom:
secretKeyRef:
name: minio-secrets
key: FORECASTING_MINIO_SECRET_KEY

View File

@@ -18,6 +18,13 @@ resources:
# Additional configs
- configs/postgres-init-config.yaml
# MinIO Storage (with TLS)
- components/minio/minio-secrets.yaml
- secrets/minio-tls-secret.yaml
- components/minio/minio-pvc.yaml
- components/minio/minio-deployment.yaml
- jobs/minio-bucket-init-job.yaml
# Migration jobs
- migrations/auth-migration-job.yaml
@@ -63,9 +70,6 @@ resources:
- components/nominatim/nominatim.yaml
- jobs/nominatim-init-job.yaml
# Persistent storage
- components/volumes/model-storage-pvc.yaml
# Cert manager cluster issuers
- components/cert-manager/cluster-issuer-staging.yaml
- components/cert-manager/local-ca-issuer.yaml

View File

@@ -0,0 +1,28 @@
apiVersion: v1
kind: Secret
metadata:
name: minio-tls
namespace: bakery-ia
labels:
app.kubernetes.io/name: bakery-ia
app.kubernetes.io/component: minio-tls
app.kubernetes.io/part-of: bakery-ia
type: Opaque
data:
# MinIO TLS certificates (base64 encoded)
# Generated using infrastructure/tls/generate-minio-certificates.sh
# Valid for 3 years from generation date
#
# Certificate details:
# Subject: CN=minio.bakery-ia.svc.cluster.local, O=BakeryIA, OU=Storage
# Issuer: CN=BakeryIA-CA, O=BakeryIA, OU=Security
#
# To regenerate:
# 1. Run: infrastructure/tls/generate-minio-certificates.sh
# 2. Run: scripts/create-tls-secrets.sh
ca-cert.pem: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUZ5ekNDQTdPZ0F3SUJBZ0lVUGdPcU5ZK1pvS0J5UTFNZk84bGtpR2hPbXhJd0RRWUpLb1pJaHZjTkFRRUwKQlFBd2RURUxNQWtHQTFVRUJoTUNWVk14RXpBUkJnTlZCQWdNQ2tOaGJHbG1iM0p1YVdFeEZUQVRCZ05WQkFjTQpERk5oYmtaeVlXNWphWE5qYnpFUk1BOEdBMVVFQ2d3SVFtRnJaWEo1U1VFeEVUQVBCZ05WQkFzTUNGTmxZM1Z5CmFYUjVNUlF3RWdZRFZRUUREQXRDWVd0bGNubEpRUzFEUVRBZUZ3MHlOVEV3TVRneE5ESXlNVFJhRncwek5URXcKTVRZeE5ESXlNVFJhTUhVeEN6QUpCZ05WQkFZVEFsVlRNUk13RVFZRFZRUUlEQXBEWVd4cFptOXlibWxoTVJVdwpFd1lEVlFRSERBeFRZVzVHY21GdVkybHpZMjh4RVRBUEJnTlZCQW9NQ0VKaGEyVnllVWxCTVJFd0R3WURWUVFMCkRBaFRaV04xY21sMGVURVVNQklHQTFVRUF3d0xRbUZyWlhKNVNVRXRRMEV3Z2dJaU1BMEdDU3FHU0liM0RRRUIKQVFVQUE0SUNEd0F3Z2dJS0FvSUNBUURSRDVPMmVna1lnOUhOUlI1U1UwYkxuR0hqcHYvUmFnck03ZGh1c2FXbgpyZkRGNVZwVFo0czkvOXNPRUowTnlqdW9LWGFtb3VUd1IxbncxOUZkSDhmMWVvbWNRNGVLdzJIa3hveHFSMzR0ClJEYUFHejNiV08rcmFUUTRTeU1LN1hGTW92VVVpTGwrR08yM2wxQk5QZmh6a2NEa1o5N200MzRmMVFWbzk5dGIKaFY0YklMYW9GSXFmMDlNMEUxL2ZhQitKQ1I4WWtsN0xvWGd1ejNWUi9CVW5kMHZNc1RNV3VlRC8yblZ1VVpPMAowcFVtVFVCUTJRZDc2NTdrL0hXZC8xd2NFQUw5ZFhOUmJ4aEROZkdnYzNXdFFoZ2djcFlMUWFmTGE4MXRseHljCndEZ042UGRFbFVseGdYL091b1oxeWxNWkU3eHBzTXRwbjFBd2VvZFZibTNRcDVBMXlkeWJFNjF1MXVyWXoxTHQKV05aOWVPZkFxZXdpWVFIVlpXTUM0YTRTYSsyeU02cTVQWC80ZytUYklUaDhoWkp3WFBLNUVEaWc3dkYxNEpQbApsRVJOcHdpYTNuNmEwUDcwM0hQTjZya1FPNWtWVGRpVXNmaWJNdGNVSkhMeVdXUUFSQm15ZVZma0lDYWFlWUVsCkVMa3N3YTlOVkVTS3ZRYUhLU2lIWkZoRUkwYUF2Y3BBam0xRU9oRWEraFNSaE9vRnlVT3ZHK2NNT2ZjQlNtTDAKVW1sRC9sZmFuVFQwems1YXFzcEVrWEdlQnczMXJtWi8wQVpPalYycHBSeFdXZWt6bzlCZjdnNmVMVFk0VUNDNQpNeVB0em14OVRiWHJOQW5YaGlGNkxnNWgyOFI0MkdUZTVBZDZUSGtGOVMvS2hxOHUwZFk1U0EyR1VGMUViUU84Ckt3SURBUUFCbzFNd1VUQWRCZ05WSFE0RUZnUVVBKzZxL2tjOGZUUVUxRURxekdSZktRcHE2bTB3SHdZRFZSMGoKQkJnd0ZvQVVBKzZxL2tjOGZUUVUxRURxekdSZktRcHE2bTB3RHdZRFZSMFRBUUgvQkFVd0F3RUIvekFOQmdrcQpoa2lHOXcwQkFRc0ZBQU9DQWdFQVF1dkZoMitIUUZ5OFZUY1VnYWxFVmlheXQxelFHdjRySVNtaXEzRzZJZVhQClhTNGd3cUhrRnpUd1p2bW9oVHdtT0N3Vy94RjRLZ3htRmJ5V05yRUpKRXFjYmVkcVVXVi8wQkNhRm1KdlVkZEkKK2V4L2lEM0ZlYnU4QUZJK0o4bEJIL0NlbkRpU0xIaGd5c2VZOHV3Um5Yc3NoWDVSbkRpckYxdUtyMUo2MzVhbgpHbHlGSU5Vcm5RbGd1RXZ0cjBlbkdVbHpUNXJXajR5MEFXVWRiWGk4dlJzaldvUThKYTBCeFRyWVloL2tPL0ZJClB0cVg3d3N4b0pNREVRNzF6aHdhN1dMUWMyZGZiMnJBcjF1QmgzcU53aVZCSU5CK3QzSkZ2NzJ4cXNXZ3VySUIKSWYyc29SVEkybk1lNWdURzFEZmQrVjI0amZhL3lJZ0FzTWpDem1HUUsyMHZvYlg0c0FWbm1QVmJaZzlTTEZaaQpNaWRrbjlPOVU2OE1FT2UzSWFzY2xkN2ZwNUprK0hyYkpVNi9zMTZFRVIvQWdEM09vajN3UmdqVENTK0FERCtqCnhvMk84Vlgya1BvMDNBTitpWWEzbkptbE1GekNyelQrOFp4U25QNUZxR2cyRUNFYnFxQTBCLzVuYVZwbWRZYVYKNDFvRkxzd2NGbTJpcUdhd2JzTE45eDN0dklDdUU5M0hZazFqNzJQelhhaVNMdHB2YW1IMWRSWUMrSFVNMUwwTwo0OUNOTVlKZUwvTmx5UXVaSm0yWDBxRE5TWG1STUw4SFU5c093V1g2cFBQSk96dXF0Z2R4Lytsa0dBZDJ3WkpVCklWYm1MNlF2emRidGEvY1NWd3NMdEJ6RzQ4YTFiNEtCYzdXTEhUd2JyZEJSVGcwVGtMWTRrdkNaZTVuTmw0RT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
minio-cert.pem: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUdyVENDQkpXZ0F3SUJBZ0lVRytCME0ycnhucWpHZHRmbzBCaGV2S0N4MGdBd0RRWUpLb1pJaHZjTkFRRUwKQlFBd2RURUxNQWtHQTFVRUJoTUNWVk14RXpBUkJnTlZCQWdNQ2tOaGJHbG1iM0p1YVdFeEZUQVRCZ05WQkFjTQpERk5oYmtaeVlXNWphWE5qYnpFUk1BOEdBMVVFQ2d3SVFtRnJaWEo1U1VFeEVUQVBCZ05WQkFzTUNGTmxZM1Z5CmFYUjVNUlF3RWdZRFZRUUREQXRDWVd0bGNubEpRUzFEUVRBZUZ3MHlOakF4TVRjeE5EVTBORGhhRncweU9UQXgKTVRZeE5EVTBORGhhTUlHS01Rc3dDUVlEVlFRR0V3SlZVekVUTUJFR0ExVUVDQXdLUTJGc2FXWnZjbTVwWVRFVgpNQk1HQTFVRUJ3d01VMkZ1Um5KaGJtTnBjMk52TVJFd0R3WURWUVFLREFoQ1lXdGxjbmxKUVRFUU1BNEdBMVVFCkN3d0hVM1J2Y21GblpURXFNQ2dHQTFVRUF3d2hiV2x1YVc4dVltRnJaWEo1TFdsaExuTjJZeTVqYkhWemRHVnkKTG14dlkyRnNNSUlDSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQWc4QU1JSUNDZ0tDQWdFQW5qdTd0cFF3dkYvVgprL011UmhySllWME1KcXRyRkovTlgrMU9MSmFNaEZYL0tZMTBMUCtCNjV3L3BsWkd5SnRidFVkV2o1d1pMclpRCm1KYjNwNFR0dUs0QlQxZ3UzYlNaS0lIUU5lQWc4MUtzTUdxKzV1WE9vUFdOckFoaDRoWU9KNDVtSXNZYmEwRGQKTzJNRnY5V3VXVm4zVDZGenpNN3FMZENKelpOamVhQjdtVEpqZEhHcjg0aVQ4NkFFQStIeXd2c3FPb2paZStVagpLdThYcmp4VUdSL2VQRnZRQ3lNZFdnRmJqd2lqSi9CbjhSQ0FSSXVpRXNzalNMUVdPZ1FncklBVHZFRi9jeVVkClpLR2hhYzMvNEk3MXhEV2hYNzFYV1l3T05FbXJRNmNHelhtdmNVTVY4SHZFV016YjA1UnBPWXp5bUtyYnhOTDQKZVdOYUt2cnZjWnpjTXpwSU00UmVHS3cyTjlzQUdzM1lCVFI3V1hMS1dnbkxZYnNvSHgzZGRadXlRK0hKd0RUWApxcFh1dFloYW9DZmZIMjNuTU1GaUFLMWltZWJCSTFoVWNBaVB2cFN4N2RJM21nTlA0YWZOL29xaE1PUGc4VHhtCndNZWt2cHovN2NXYkNPTmprZDlkcTBWTExTVyt0cUlmZlZRajBMT1VQdlhyTE9tUG1jTDZsU2xSTzg4NVRWdngKSkRidDJYVVJtaHFKenBhcklmTmhGOUVscEhtYnNkc2xtWVBvLzlKV1VtcmtiSjZBYWZkbEpuckNUR3hKcGl3TAowbEpveEl3dnFZdDhEQnVjMWNORktKSVNMWkl5bzZ1WFJ1TlZvTnByeGdmVXZsOENscDNnUyttSVNGZzMzdTJrCkpjYnF6bnZ2YzN0YmxIZTB4ZzJNSE1JVlRkWmlSamNDQXdFQUFhT0NBUjB3Z2dFWk1Bc0dBMVVkRHdRRUF3SUUKTURBZEJnTlZIU1VFRmpBVUJnZ3JCZ0VGQlFjREFRWUlLd1lCQlFVSEF3SXdnYW9HQTFVZEVRU0JvakNCbjRJaApiV2x1YVc4dVltRnJaWEo1TFdsaExuTjJZeTVqYkhWemRHVnlMbXh2WTJGc2dnOXRhVzVwYnk1aVlXdGxjbmt0CmFXR0NLVzFwYm1sdkxXTnZibk52YkdVdVltRnJaWEo1TFdsaExuTjJZeTVqYkhWemRHVnlMbXh2WTJGc2doZHQKYVc1cGJ5MWpiMjV6YjJ4bExtSmhhMlZ5ZVMxcFlZSUZiV2x1YVcrQ0RXMXBibWx2TFdOdmJuTnZiR1dDQ1d4dgpZMkZzYUc5emRJY0Vmd0FBQVRBZEJnTlZIUTRFRmdRVXJXMzNxOWkreE5MdVZjcGUrKzlxUE56dVF4VXdId1lEClZSMGpCQmd3Rm9BVUErNnEva2M4ZlRRVTFFRHF6R1JmS1FwcTZtMHdEUVlKS29aSWh2Y05BUUVMQlFBRGdnSUIKQUlTT0NieFJWd2xtaWdjNldLM3hUaUJxNlJGMGNzdnV5NjJNYnI3N0h0Q3VPNHgxOTI5QjAxMXd1djdnWEhmawpPQm9qa3ZwZnFQUXlRZTk2dGFwRGJqYWZpeStlSHBPSm1lQjFNN2lQKzEzTGJJRjN3alE5SXZ1TWtnN3FQczZXCk15cnBvd1ZwK1BPeDU2SlJRK3lPcm5nakgxRG9FMW45NDBJR0lTZkRmb2g3cTljMkNvSlA2cWo3YWxid1U4RU0KYlB5d3B4WkFTNjYydUtBR0VNcFNLK2NuMXdUU3ZWSDN6NDVrMk9yUmwvQ05PZ0Fad1dyNzdQK1A3bW9FSHlmUQplR0dpclJTWWswUkJtYzdOTGd0Ry9iV0JQTEt4dHIyQmZidDFwZFZXakd4TmlwaDR4c1Z0YldpNnVOeUxYNE1qCllyK0FVUjd1MHlCVWxSc1VUL1dDbkFYdnRmNzRwcWJaNDZ3YjFnajEreU1GWHRNUldVV2NFcU1GVXRJdEsrUngKSlA4bUErbW9qdEdOcGdJZG53b1pPMTBsQkZ2U0ZKL1hGUFlsbHFKOGJpWmJ3RDZtWElzei9WQmdDRHlyQ3kybwpQeVhzR29HNDdTZkovQldvdHUwRkNaZERreCtQU0k2bkdKdyt2empSVzJ3TU9tdzJiZ0xkK3dsVDNpTXp4V3VOCkNidk0wSmpTQ2J3YVMvdE84emtrNGROeVhkWWNQbkJPNVJlM1IrQUV3T0RxV2F4T0ZXYmVUWW10bHlOTXdNT04Kd2lpR3pLWjkwaHM5QSt6M2x0QldNNmxNOFBJaFplcHB1TEZNTDRMSjZ0Ti93anJrOEVVMFBNT2ZlUTVjWXprZAp3QXdiRjVXaVhDd2JtaERCbW4xVVBrMjdPQUV0TzRSM3luaXM0eGNJbmVTQwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
minio-key.pem: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlKS2dJQkFBS0NBZ0VBbmp1N3RwUXd2Ri9Way9NdVJockpZVjBNSnF0ckZKL05YKzFPTEphTWhGWC9LWTEwCkxQK0I2NXcvcGxaR3lKdGJ0VWRXajV3WkxyWlFtSmIzcDRUdHVLNEJUMWd1M2JTWktJSFFOZUFnODFLc01HcSsKNXVYT29QV05yQWhoNGhZT0o0NW1Jc1liYTBEZE8yTUZ2OVd1V1ZuM1Q2Rnp6TTdxTGRDSnpaTmplYUI3bVRKagpkSEdyODRpVDg2QUVBK0h5d3ZzcU9valplK1VqS3U4WHJqeFVHUi9lUEZ2UUN5TWRXZ0ZiandpakovQm44UkNBClJJdWlFc3NqU0xRV09nUWdySUFUdkVGL2N5VWRaS0doYWMzLzRJNzF4RFdoWDcxWFdZd09ORW1yUTZjR3pYbXYKY1VNVjhIdkVXTXpiMDVScE9ZenltS3JieE5MNGVXTmFLdnJ2Y1p6Y016cElNNFJlR0t3Mk45c0FHczNZQlRSNwpXWExLV2duTFlic29IeDNkZFp1eVErSEp3RFRYcXBYdXRZaGFvQ2ZmSDIzbk1NRmlBSzFpbWViQkkxaFVjQWlQCnZwU3g3ZEkzbWdOUDRhZk4vb3FoTU9QZzhUeG13TWVrdnB6LzdjV2JDT05qa2Q5ZHEwVkxMU1crdHFJZmZWUWoKMExPVVB2WHJMT21QbWNMNmxTbFJPODg1VFZ2eEpEYnQyWFVSbWhxSnpwYXJJZk5oRjlFbHBIbWJzZHNsbVlQbwovOUpXVW1ya2JKNkFhZmRsSm5yQ1RHeEpwaXdMMGxKb3hJd3ZxWXQ4REJ1YzFjTkZLSklTTFpJeW82dVhSdU5WCm9OcHJ4Z2ZVdmw4Q2xwM2dTK21JU0ZnMzN1MmtKY2Jxem52dmMzdGJsSGUweGcyTUhNSVZUZFppUmpjQ0F3RUEKQVFLQ0FnQVhHQWE4amdKUzYvWERBeUlFejFJRzZNcW1OaXlKdFEwSGJCNFZ1ZDlHVFRyUmVMaTAvSkdjcnBCSAptWjM1RjF1YUtKQkVvM2ExYjV4eHVNN3FYeWRHNWZhQSt4RFVBTkM5cmJ5U3NHUit2dGtzczllcTRXMTM1bjdICjFlMWJUdmEvNVRPWTdhc0F5MVcrbmlRdnJHTW0zVStRQ3JOWTkvWUx1N3p4Q1FyaXJINTlqSEloZzVtaUVKUHYKWWJKVVVyellva20yZzFTaWxYMjlmV25LWHpteTlRaTliSFQvdXg5RWpLQXRUd2hwQXRoWXdaekc1RTVDU2UyYgpaZFU4b0crWVhaVUR5OWRyR2NhaGNrbVpwSndzelJDbmsyQTdGZXBTd25Nc1JIZy9obmdpc3hqZEFmcUl2N2VYCmNrYS9LWkQxK2xGSjROMzBhd29peFZKYXBZY2VwZk1hMS83dE1vZFFsOXdaOVZLWTZ6YlEwL1U0QndlMGQ0OEYKQ1graVlOZ2t4UWRmdVdwMFU2RkVlUTluR2tPMndZQUJxMCtzSDIxU2puRTQvTXh5anpLZCtjR08zUkdkTktxUwo5QTVubkh4MUwxVDN6Z0hOR2ZHS1F6Tzg5L09sVDBWVE80OEhkamxva0hmc3VTVG03N2tkZkU1TVFwamF2WktaCmo0QXoyWENGWkM2WkJxYm9wZlA1amVNWmI1WDU0aXVtclIwcHpRRGloQ3ZZWmYxTlVDa3hFdFZmaTF1eUtvLzYKMzhQK0pDcEtWSk1mYzhyYTFlWVRTV0ZaZDc1UXVMK1FtblpPVUNqQktXMnNQQTVGbERyTkVTdTQrREhCVVFtOApxdUxDUGdLaHA1TmVJRDVjcm5iVElYclVCb2tQdHpsWm10SEs5TFRYeTNPWkdXUmt5UUtDQVFFQTF0OFRhdWdCCmpMUVI2NXBTbGRXTDdVSnVGVlZUVW9DSlB5cHlOQjkvc1VsTC9Nd1RBbHlhWHoveU15Q2VCdWt3cnBMT1M0NHMKaG5kQlJOL3ZsdkRCaEovVjdYaDBEUWUvMGlqczRJdGNYQ1lpN3hFcWZOd1FQTUJEKzVyWkdKeU1iOEtLV3YwSwpBUnhES0k0YytLUkQwemQ1d1ZtelZSTjdLZlUzT3FXbGV1TjNMTFZqN3R6YU9kT2xSU0E3YWlCTS9odWQ1VFE5CkUwcEF3SDhIaGMxYW1qaUM4dEJsYUZlZ0lodXpJenhNU1hIUkJVcDNsaDMvb2UzNjM4Mm5zRUxjbE4xaFVWRGsKdDNUQVpjdHlYRkIzSEUydHpJdm9xRUpRN0Zkd3MwNUVQZXFIODFOekdjRlRNS1NieVJzNmtYYzhFQ0hPc2lYSAp6TDd5dlI3S1BmVHZhd0tDQVFFQXZJVlZRV3lpcU5ScTdTQkd3czg3WjVjZFlJOGdwSkI4bFlySklqaTRyVUVFCk14MmdVeCtYaHM5QTJSczQxZ1hsYXdvRWNqUDliZXJ2ZTYzMVZOV0M0K3Q5cFR2Vm9qcVhtcnZaNVVEN3V2Q0kKRlFPLy9JSUdqa0tFZkRwSUgvcWxEUlZlbEZTU1JjOVEvY0piZlNwS2JsYnJYZ1FtdG5KOWpsQkpFL1NMSW14UAo3OURVdGlmWmx5cFVRbDl5YzhSZzFSYmpyQWtjQVZhOVBHMXQ3cGhTanJkZHRKbXRVUmtFdGhYWTc3R3c5WHJUCjgwWlJHdkpIS0lsWlBmaHF2WlNGQzg4MVJJZ0lpRitCdWxobm16TUo0dmdYeXEwVCtRY1VGN0FBdFBRU0hyMHIKQm5wN1JlUDF5R201UDd0MjNmRU00Z0R1RENBUHQ0R1lZeUxFY2dpelpRS0NBUUVBaE9MVGJITnR1ZW9IaHpFYQowQ1dRY3p4NVBtSlZ0SmxmeUJ2bEkwMHp1SjMvQzZuZU84Q3ZqQ2JORUVlazA5dFZ5ekZwdWhxRWVPaTZDZkdBCmlGWC9LSmw5UVc4VVBwYkRVQ01WVkUxNzRsV0hsMWlEY1ZMY0MrWlFaUVBBTGROcm14YXlZRkZMNWFIbit1WGgKRHZqd0pXbVN1RHhVaDFJVUFyL3YxeXBvckJhUE5xdzcwSmJ2czRHc0haTXdpNUxNYXY4RGFLUWsvWkFYZWJWVwpIcThBMEk0UWxrREI1b1VDdVBWdWxXVU9QUUhSNWpiR3ZLVnkybCtHbnZEZU8wa3VpRFpkb0YrcUE3ZUY0YTZ2CjNGMjdQRnJpR0xXU1ByVTh2TjNiQ2xsbUpQQ3VBWk5qaE5NbU10Z3FySFpWZzI4OVN6RE5WeW04Wm1qVlVKY0IKTnM0TFh3S0NBUUVBdDRua0tBOFpDZC9NdmxJbk1qREorQit5ZFRqRG9oUWRod1lZcmgybEJ1QitzemxMeHNIQwpKM2lOL1JFNHMzNElEcjh3OXZMUThIdkRicGs5ZWJ0cGRIYm4yNysyVFB4WWIwZ21hc0ZxazJUc1IvRmZyL256CllmczJ1eStPMnJ1T2gzOWZsbkFEL0wxTGI5TVNlWGg4QUpMVkViSmU4ay9qRjNQb3dlbmFyOGZkeDNCOE4xL3kKd3U1dUhEU0szRlM3cFpwa1REQ09PR3QzVDJhR21iMW8yeE9Bd255L3RXM3pIVWVGN2s4RUp1clBnVkRiVTYyLwpRNkw4NUkxL2RsVXJkd1RrS25WNlFUTWl2UWFtei8zUHlVNmE4ekt3ZUVuQThSTGtqVWYyZ0VEUnE3d0JXbGtICkNIaU41NU9ldFpPaVpFSmRnQ2FTeHFrQWNMdi9uN29DMVFLQ0FRRUFxRkNHVDFWWG4yUGEwdFQ2ZCtvRnZYYTkKSENVMTFEbG9ad1hUOTY4cmhGOEJSazdLRVVvZXpFdjZiTUZsdUwzak9jMDNkUUs1WlF0anZUQkZKYlc3NVZMVgphcnR1U0xiVS9CVytnRGtZWmszQ241Z1B6QzlIbGRDa3MrS0lDOHJBcUNPdW9NRzc3SFlOVys3ckJLS3did2w1CmtDQW1uSmE2NWZZczdDWXpEOThmb0crVmxsc25VWCttMUxMZUtjclBEZWlpcW5kQmFTWi9NRVJnWmE2SXZid2kKMDVtNnFqL3ZXL1ZiV05iNVR4Z2N5MWpOOXpRbWJONFJ0Zmdzc3NKRmZzS3JNS0lxVnp1NkNMcEJ4eXBOUXZHYQo0S3UzVFZGcm9zaFlxWUpMVm1xVklYT1dWZk9IQTRMT2VpNmtDZTlHaTQydjdqS014M0dEK25CK1BWbVFXZz09Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg==

View File

@@ -666,7 +666,7 @@ replicas:
- name: tenant-service
count: 1
- name: training-service
count: 1
count: 2 # Safe with MinIO storage
- name: forecasting-service
count: 1
- name: sales-service

View File

@@ -200,7 +200,7 @@ replicas:
- name: tenant-service
count: 2
- name: training-service
count: 2
count: 3 # Safe with MinIO storage - no PVC conflicts
- name: forecasting-service
count: 3
- name: sales-service

View File

@@ -1,12 +0,0 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: model-storage
namespace: bakery-ia
spec:
storageClassName: microk8s-hostpath # MicroK8s storage class
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi # Increased for production (adjust based on your needs)

View File

@@ -31,7 +31,7 @@
"y": 3,
"w": 6,
"h": 3,
"i": "api-calls-per-user",
"i": "user-actions",
"moved": false,
"static": false
},
@@ -40,7 +40,16 @@
"y": 3,
"w": 6,
"h": 3,
"i": "session-duration",
"i": "page-views",
"moved": false,
"static": false
},
{
"x": 0,
"y": 6,
"w": 12,
"h": 4,
"i": "geo-visitors",
"moved": false,
"static": false
}
@@ -51,7 +60,7 @@
"name": "service",
"description": "Filter by service name",
"type": "QUERY",
"queryValue": "SELECT DISTINCT(resource_attrs['service.name']) as value FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'active_users' AND value != '' ORDER BY value",
"queryValue": "SELECT DISTINCT(serviceName) FROM signoz_traces.distributed_signoz_index_v2 ORDER BY serviceName",
"customValue": "",
"textboxValue": "",
"showALLOption": true,
@@ -59,7 +68,7 @@
"order": 1,
"modificationUUID": "",
"sort": "ASC",
"selectedValue": null
"selectedValue": "bakery-frontend"
}
},
"widgets": [
@@ -75,26 +84,26 @@
"builder": {
"queryData": [
{
"dataSource": "metrics",
"dataSource": "traces",
"queryName": "A",
"aggregateOperator": "sum",
"aggregateOperator": "count_distinct",
"aggregateAttribute": {
"key": "active_users",
"dataType": "int64",
"type": "Gauge",
"isColumn": false
"key": "user.id",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"timeAggregation": "latest",
"timeAggregation": "count_distinct",
"spaceAggregation": "sum",
"functions": [],
"filters": {
"items": [
{
"key": {
"key": "service.name",
"key": "serviceName",
"dataType": "string",
"type": "resource",
"isColumn": false
"type": "tag",
"isColumn": true
},
"op": "=",
"value": "{{.service}}"
@@ -110,13 +119,13 @@
"orderBy": [],
"groupBy": [
{
"key": "service.name",
"key": "serviceName",
"dataType": "string",
"type": "resource",
"isColumn": false
"type": "tag",
"isColumn": true
}
],
"legend": "{{service.name}}",
"legend": "{{serviceName}}",
"reduceTo": "sum"
}
],
@@ -139,16 +148,16 @@
"builder": {
"queryData": [
{
"dataSource": "metrics",
"dataSource": "traces",
"queryName": "A",
"aggregateOperator": "sum",
"aggregateOperator": "count",
"aggregateAttribute": {
"key": "user_sessions_total",
"dataType": "int64",
"type": "Counter",
"isColumn": false
"key": "session.id",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"timeAggregation": "sum",
"timeAggregation": "count",
"spaceAggregation": "sum",
"functions": [],
"filters": {
@@ -162,6 +171,16 @@
},
"op": "=",
"value": "{{.service}}"
},
{
"key": {
"key": "span.name",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"op": "=",
"value": "user_session"
}
],
"op": "AND"
@@ -192,9 +211,9 @@
"yAxisUnit": "none"
},
{
"id": "api-calls-per-user",
"title": "API Calls per User",
"description": "Average API calls per user by service",
"id": "user-actions",
"title": "User Actions",
"description": "Total user actions by service",
"isStacked": false,
"nullZeroValues": "zero",
"opacity": "1",
@@ -203,17 +222,17 @@
"builder": {
"queryData": [
{
"dataSource": "metrics",
"dataSource": "traces",
"queryName": "A",
"aggregateOperator": "avg",
"aggregateOperator": "count",
"aggregateAttribute": {
"key": "api_calls_per_user",
"dataType": "float64",
"type": "Gauge",
"isColumn": false
"key": "user.action",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"timeAggregation": "avg",
"spaceAggregation": "avg",
"timeAggregation": "count",
"spaceAggregation": "sum",
"functions": [],
"filters": {
"items": [
@@ -226,6 +245,16 @@
},
"op": "=",
"value": "{{.service}}"
},
{
"key": {
"key": "span.name",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"op": "=",
"value": "user_action"
}
],
"op": "AND"
@@ -245,7 +274,7 @@
}
],
"legend": "{{serviceName}}",
"reduceTo": "avg"
"reduceTo": "sum"
}
],
"queryFormulas": []
@@ -256,9 +285,9 @@
"yAxisUnit": "none"
},
{
"id": "session-duration",
"title": "Session Duration",
"description": "Average session duration by service",
"id": "page-views",
"title": "Page Views",
"description": "Total page views by service",
"isStacked": false,
"nullZeroValues": "zero",
"opacity": "1",
@@ -267,17 +296,17 @@
"builder": {
"queryData": [
{
"dataSource": "metrics",
"dataSource": "traces",
"queryName": "A",
"aggregateOperator": "avg",
"aggregateOperator": "count",
"aggregateAttribute": {
"key": "session_duration_seconds",
"dataType": "float64",
"type": "Gauge",
"isColumn": false
"key": "page.path",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"timeAggregation": "avg",
"spaceAggregation": "avg",
"timeAggregation": "count",
"spaceAggregation": "sum",
"functions": [],
"filters": {
"items": [
@@ -290,6 +319,16 @@
},
"op": "=",
"value": "{{.service}}"
},
{
"key": {
"key": "span.name",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"op": "=",
"value": "page_view"
}
],
"op": "AND"
@@ -309,7 +348,7 @@
}
],
"legend": "{{serviceName}}",
"reduceTo": "avg"
"reduceTo": "sum"
}
],
"queryFormulas": []
@@ -317,7 +356,74 @@
"queryType": "builder"
},
"fillSpans": false,
"yAxisUnit": "seconds"
"yAxisUnit": "none"
},
{
"id": "geo-visitors",
"title": "Geolocation Visitors",
"description": "Number of visitors who shared location data",
"isStacked": false,
"nullZeroValues": "zero",
"opacity": "1",
"panelTypes": "value",
"query": {
"builder": {
"queryData": [
{
"dataSource": "traces",
"queryName": "A",
"aggregateOperator": "count",
"aggregateAttribute": {
"key": "user.id",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"timeAggregation": "count",
"spaceAggregation": "sum",
"functions": [],
"filters": {
"items": [
{
"key": {
"key": "serviceName",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"op": "=",
"value": "{{.service}}"
},
{
"key": {
"key": "span.name",
"dataType": "string",
"type": "tag",
"isColumn": true
},
"op": "=",
"value": "user_location"
}
],
"op": "AND"
},
"expression": "A",
"disabled": false,
"having": [],
"stepInterval": 60,
"limit": null,
"orderBy": [],
"groupBy": [],
"legend": "Visitors with Location Data (See GEOLOCATION_VISUALIZATION_GUIDE.md for map integration)",
"reduceTo": "sum"
}
],
"queryFormulas": []
},
"queryType": "builder"
},
"fillSpans": false,
"yAxisUnit": "none"
}
]
}

View File

@@ -1 +1 @@
1BE074336AF19EA8C676D7E8D0185EBCA0B1D1FF
1BE074336AF19EA8C676D7E8D0185EBCA0B1D202

View File

@@ -0,0 +1,111 @@
#!/usr/bin/env bash
# Generate MinIO TLS certificates using existing CA
# This script generates certificates for MinIO server
set -e
TLS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CA_DIR="$TLS_DIR/ca"
MINIO_DIR="$TLS_DIR/minio"
mkdir -p "$MINIO_DIR"
echo "Generating MinIO TLS certificates using existing CA..."
echo "CA Directory: $CA_DIR"
echo "MinIO Directory: $MINIO_DIR"
echo ""
# Check if CA exists
if [ ! -f "$CA_DIR/ca-cert.pem" ] || [ ! -f "$CA_DIR/ca-key.pem" ]; then
echo "ERROR: CA certificates not found. Please run generate-certificates.sh first."
exit 1
fi
# Generate MinIO server private key
echo "Step 1: Generating MinIO server private key..."
openssl genrsa -out "$MINIO_DIR/minio-key.pem" 4096
# Convert to traditional RSA format (required by MinIO)
echo "Step 1b: Converting private key to traditional RSA format..."
openssl rsa -in "$MINIO_DIR/minio-key.pem" -traditional -out "$MINIO_DIR/minio-key.pem"
# Create certificate signing request (CSR)
echo "Step 2: Creating MinIO certificate signing request..."
openssl req -new -key "$MINIO_DIR/minio-key.pem" -out "$MINIO_DIR/minio.csr" \
-subj "/C=US/ST=California/L=SanFrancisco/O=BakeryIA/OU=Storage/CN=minio.bakery-ia.svc.cluster.local"
# Create SAN (Subject Alternative Names) configuration for MinIO
cat > "$MINIO_DIR/san.cnf" <<EOF
[req]
distinguished_name = req_distinguished_name
req_extensions = v3_req
prompt = no
[req_distinguished_name]
C = US
ST = California
L = SanFrancisco
O = BakeryIA
OU = Storage
CN = minio.bakery-ia.svc.cluster.local
[v3_req]
keyUsage = keyEncipherment, dataEncipherment
extendedKeyUsage = serverAuth, clientAuth
subjectAltName = @alt_names
[alt_names]
DNS.1 = minio.bakery-ia.svc.cluster.local
DNS.2 = minio.bakery-ia
DNS.3 = minio-console.bakery-ia.svc.cluster.local
DNS.4 = minio-console.bakery-ia
DNS.5 = minio
DNS.6 = minio-console
DNS.7 = localhost
IP.1 = 127.0.0.1
EOF
# Sign the certificate with CA (valid for 3 years)
echo "Step 3: Signing MinIO certificate with CA..."
openssl x509 -req -in "$MINIO_DIR/minio.csr" \
-CA "$CA_DIR/ca-cert.pem" -CAkey "$CA_DIR/ca-key.pem" -CAcreateserial \
-out "$MINIO_DIR/minio-cert.pem" -days 1095 \
-extensions v3_req -extfile "$MINIO_DIR/san.cnf"
# Set proper permissions
chmod 600 "$MINIO_DIR/minio-key.pem"
chmod 644 "$MINIO_DIR/minio-cert.pem"
# Copy CA cert for MinIO
cp "$CA_DIR/ca-cert.pem" "$MINIO_DIR/ca-cert.pem"
echo ""
echo "Step 4: Verifying MinIO certificates..."
# Verify MinIO certificate
echo "MinIO certificate details:"
openssl x509 -in "$MINIO_DIR/minio-cert.pem" -noout -subject -issuer -dates
openssl verify -CAfile "$CA_DIR/ca-cert.pem" "$MINIO_DIR/minio-cert.pem"
echo ""
echo "==================="
echo "✓ MinIO certificates generated successfully!"
echo ""
echo "Generated files:"
echo " MinIO:"
echo " - $MINIO_DIR/minio-cert.pem (Server certificate)"
echo " - $MINIO_DIR/minio-key.pem (Server private key - traditional RSA format)"
echo " - $MINIO_DIR/ca-cert.pem (CA certificate)"
echo ""
echo "Important Notes:"
echo " • Private key is in traditional RSA format (BEGIN RSA PRIVATE KEY)"
echo " • This format is required by MinIO to avoid 'The private key contains additional data' error"
echo " • Certificates follow the standardized Opaque secret structure"
echo ""
echo "Next steps:"
echo " 1. Update Kubernetes minio-tls secret with these certificates"
echo " 2. Apply the updated secret to your cluster"
echo " 3. Restart MinIO pods if necessary"
echo ""
echo "For more details, see: docs/MINIO_TLS_FIX_SUMMARY.md"

View File

@@ -0,0 +1,33 @@
-----BEGIN CERTIFICATE-----
MIIFyzCCA7OgAwIBAgIUPgOqNY+ZoKByQ1MfO8lkiGhOmxIwDQYJKoZIhvcNAQEL
BQAwdTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFTATBgNVBAcM
DFNhbkZyYW5jaXNjbzERMA8GA1UECgwIQmFrZXJ5SUExETAPBgNVBAsMCFNlY3Vy
aXR5MRQwEgYDVQQDDAtCYWtlcnlJQS1DQTAeFw0yNTEwMTgxNDIyMTRaFw0zNTEw
MTYxNDIyMTRaMHUxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRUw
EwYDVQQHDAxTYW5GcmFuY2lzY28xETAPBgNVBAoMCEJha2VyeUlBMREwDwYDVQQL
DAhTZWN1cml0eTEUMBIGA1UEAwwLQmFrZXJ5SUEtQ0EwggIiMA0GCSqGSIb3DQEB
AQUAA4ICDwAwggIKAoICAQDRD5O2egkYg9HNRR5SU0bLnGHjpv/RagrM7dhusaWn
rfDF5VpTZ4s9/9sOEJ0NyjuoKXamouTwR1nw19FdH8f1eomcQ4eKw2HkxoxqR34t
RDaAGz3bWO+raTQ4SyMK7XFMovUUiLl+GO23l1BNPfhzkcDkZ97m434f1QVo99tb
hV4bILaoFIqf09M0E1/faB+JCR8Ykl7LoXguz3VR/BUnd0vMsTMWueD/2nVuUZO0
0pUmTUBQ2Qd7657k/HWd/1wcEAL9dXNRbxhDNfGgc3WtQhggcpYLQafLa81tlxyc
wDgN6PdElUlxgX/OuoZ1ylMZE7xpsMtpn1AweodVbm3Qp5A1ydybE61u1urYz1Lt
WNZ9eOfAqewiYQHVZWMC4a4Sa+2yM6q5PX/4g+TbITh8hZJwXPK5EDig7vF14JPl
lERNpwia3n6a0P703HPN6rkQO5kVTdiUsfibMtcUJHLyWWQARBmyeVfkICaaeYEl
ELkswa9NVESKvQaHKSiHZFhEI0aAvcpAjm1EOhEa+hSRhOoFyUOvG+cMOfcBSmL0
UmlD/lfanTT0zk5aqspEkXGeBw31rmZ/0AZOjV2ppRxWWekzo9Bf7g6eLTY4UCC5
MyPtzmx9TbXrNAnXhiF6Lg5h28R42GTe5Ad6THkF9S/Khq8u0dY5SA2GUF1EbQO8
KwIDAQABo1MwUTAdBgNVHQ4EFgQUA+6q/kc8fTQU1EDqzGRfKQpq6m0wHwYDVR0j
BBgwFoAUA+6q/kc8fTQU1EDqzGRfKQpq6m0wDwYDVR0TAQH/BAUwAwEB/zANBgkq
hkiG9w0BAQsFAAOCAgEAQuvFh2+HQFy8VTcUgalEViayt1zQGv4rISmiq3G6IeXP
XS4gwqHkFzTwZvmohTwmOCwW/xF4KgxmFbyWNrEJJEqcbedqUWV/0BCaFmJvUddI
+ex/iD3Febu8AFI+J8lBH/CenDiSLHhgyseY8uwRnXsshX5RnDirF1uKr1J635an
GlyFINUrnQlguEvtr0enGUlzT5rWj4y0AWUdbXi8vRsjWoQ8Ja0BxTrYYh/kO/FI
PtqX7wsxoJMDEQ71zhwa7WLQc2dfb2rAr1uBh3qNwiVBINB+t3JFv72xqsWgurIB
If2soRTI2nMe5gTG1Dfd+V24jfa/yIgAsMjCzmGQK20vobX4sAVnmPVbZg9SLFZi
Midkn9O9U68MEOe3Iascld7fp5Jk+HrbJU6/s16EER/AgD3Ooj3wRgjTCS+ADD+j
xo2O8VX2kPo03AN+iYa3nJmlMFzCrzT+8ZxSnP5FqGg2ECEbqqA0B/5naVpmdYaV
41oFLswcFm2iqGawbsLN9x3tvICuE93HYk1j72PzXaiSLtpvamH1dRYC+HUM1L0O
49CNMYJeL/NlyQuZJm2X0qDNSXmRML8HU9sOwWX6pPPJOzuqtgdx/+lkGAd2wZJU
IVbmL6Qvzdbta/cSVwsLtBzG48a1b4KBc7WLHTwbrdBRTg0TkLY4kvCZe5nNl4E=
-----END CERTIFICATE-----

View File

@@ -0,0 +1,38 @@
-----BEGIN CERTIFICATE-----
MIIGrTCCBJWgAwIBAgIUG+B0M2rxnqjGdtfo0BhevKCx0gAwDQYJKoZIhvcNAQEL
BQAwdTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFTATBgNVBAcM
DFNhbkZyYW5jaXNjbzERMA8GA1UECgwIQmFrZXJ5SUExETAPBgNVBAsMCFNlY3Vy
aXR5MRQwEgYDVQQDDAtCYWtlcnlJQS1DQTAeFw0yNjAxMTcxNDU0NDhaFw0yOTAx
MTYxNDU0NDhaMIGKMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEV
MBMGA1UEBwwMU2FuRnJhbmNpc2NvMREwDwYDVQQKDAhCYWtlcnlJQTEQMA4GA1UE
CwwHU3RvcmFnZTEqMCgGA1UEAwwhbWluaW8uYmFrZXJ5LWlhLnN2Yy5jbHVzdGVy
LmxvY2FsMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAnju7tpQwvF/V
k/MuRhrJYV0MJqtrFJ/NX+1OLJaMhFX/KY10LP+B65w/plZGyJtbtUdWj5wZLrZQ
mJb3p4TtuK4BT1gu3bSZKIHQNeAg81KsMGq+5uXOoPWNrAhh4hYOJ45mIsYba0Dd
O2MFv9WuWVn3T6FzzM7qLdCJzZNjeaB7mTJjdHGr84iT86AEA+HywvsqOojZe+Uj
Ku8XrjxUGR/ePFvQCyMdWgFbjwijJ/Bn8RCARIuiEssjSLQWOgQgrIATvEF/cyUd
ZKGhac3/4I71xDWhX71XWYwONEmrQ6cGzXmvcUMV8HvEWMzb05RpOYzymKrbxNL4
eWNaKvrvcZzcMzpIM4ReGKw2N9sAGs3YBTR7WXLKWgnLYbsoHx3ddZuyQ+HJwDTX
qpXutYhaoCffH23nMMFiAK1imebBI1hUcAiPvpSx7dI3mgNP4afN/oqhMOPg8Txm
wMekvpz/7cWbCONjkd9dq0VLLSW+tqIffVQj0LOUPvXrLOmPmcL6lSlRO885TVvx
JDbt2XURmhqJzparIfNhF9ElpHmbsdslmYPo/9JWUmrkbJ6AafdlJnrCTGxJpiwL
0lJoxIwvqYt8DBuc1cNFKJISLZIyo6uXRuNVoNprxgfUvl8Clp3gS+mISFg33u2k
Jcbqznvvc3tblHe0xg2MHMIVTdZiRjcCAwEAAaOCAR0wggEZMAsGA1UdDwQEAwIE
MDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwgaoGA1UdEQSBojCBn4Ih
bWluaW8uYmFrZXJ5LWlhLnN2Yy5jbHVzdGVyLmxvY2Fsgg9taW5pby5iYWtlcnkt
aWGCKW1pbmlvLWNvbnNvbGUuYmFrZXJ5LWlhLnN2Yy5jbHVzdGVyLmxvY2Fsghdt
aW5pby1jb25zb2xlLmJha2VyeS1pYYIFbWluaW+CDW1pbmlvLWNvbnNvbGWCCWxv
Y2FsaG9zdIcEfwAAATAdBgNVHQ4EFgQUrW33q9i+xNLuVcpe++9qPNzuQxUwHwYD
VR0jBBgwFoAUA+6q/kc8fTQU1EDqzGRfKQpq6m0wDQYJKoZIhvcNAQELBQADggIB
AISOCbxRVwlmigc6WK3xTiBq6RF0csvuy62Mbr77HtCuO4x1929B011wuv7gXHfk
OBojkvpfqPQyQe96tapDbjafiy+eHpOJmeB1M7iP+13LbIF3wjQ9IvuMkg7qPs6W
MyrpowVp+POx56JRQ+yOrngjH1DoE1n940IGISfDfoh7q9c2CoJP6qj7albwU8EM
bPywpxZAS662uKAGEMpSK+cn1wTSvVH3z45k2OrRl/CNOgAZwWr77P+P7moEHyfQ
eGGirRSYk0RBmc7NLgtG/bWBPLKxtr2Bfbt1pdVWjGxNiph4xsVtbWi6uNyLX4Mj
Yr+AUR7u0yBUlRsUT/WCnAXvtf74pqbZ46wb1gj1+yMFXtMRWUWcEqMFUtItK+Rx
JP8mA+mojtGNpgIdnwoZO10lBFvSFJ/XFPYllqJ8biZbwD6mXIsz/VBgCDyrCy2o
PyXsGoG47SfJ/BWotu0FCZdDkx+PSI6nGJw+vzjRW2wMOmw2bgLd+wlT3iMzxWuN
CbvM0JjSCbwaS/tO8zkk4dNyXdYcPnBO5Re3R+AEwODqWaxOFWbeTYmtlyNMwMON
wiiGzKZ90hs9A+z3ltBWM6lM8PIhZeppuLFML4LJ6tN/wjrk8EU0PMOfeQ5cYzkd
wAwbF5WiXCwbmhDBmn1UPk27OAEtO4R3ynis4xcIneSC
-----END CERTIFICATE-----

View File

@@ -0,0 +1,51 @@
-----BEGIN RSA PRIVATE KEY-----
MIIJKgIBAAKCAgEAnju7tpQwvF/Vk/MuRhrJYV0MJqtrFJ/NX+1OLJaMhFX/KY10
LP+B65w/plZGyJtbtUdWj5wZLrZQmJb3p4TtuK4BT1gu3bSZKIHQNeAg81KsMGq+
5uXOoPWNrAhh4hYOJ45mIsYba0DdO2MFv9WuWVn3T6FzzM7qLdCJzZNjeaB7mTJj
dHGr84iT86AEA+HywvsqOojZe+UjKu8XrjxUGR/ePFvQCyMdWgFbjwijJ/Bn8RCA
RIuiEssjSLQWOgQgrIATvEF/cyUdZKGhac3/4I71xDWhX71XWYwONEmrQ6cGzXmv
cUMV8HvEWMzb05RpOYzymKrbxNL4eWNaKvrvcZzcMzpIM4ReGKw2N9sAGs3YBTR7
WXLKWgnLYbsoHx3ddZuyQ+HJwDTXqpXutYhaoCffH23nMMFiAK1imebBI1hUcAiP
vpSx7dI3mgNP4afN/oqhMOPg8TxmwMekvpz/7cWbCONjkd9dq0VLLSW+tqIffVQj
0LOUPvXrLOmPmcL6lSlRO885TVvxJDbt2XURmhqJzparIfNhF9ElpHmbsdslmYPo
/9JWUmrkbJ6AafdlJnrCTGxJpiwL0lJoxIwvqYt8DBuc1cNFKJISLZIyo6uXRuNV
oNprxgfUvl8Clp3gS+mISFg33u2kJcbqznvvc3tblHe0xg2MHMIVTdZiRjcCAwEA
AQKCAgAXGAa8jgJS6/XDAyIEz1IG6MqmNiyJtQ0HbB4Vud9GTTrReLi0/JGcrpBH
mZ35F1uaKJBEo3a1b5xxuM7qXydG5faA+xDUANC9rbySsGR+vtkss9eq4W135n7H
1e1bTva/5TOY7asAy1W+niQvrGMm3U+QCrNY9/YLu7zxCQrirH59jHIhg5miEJPv
YbJUUrzYokm2g1SilX29fWnKXzmy9Qi9bHT/ux9EjKAtTwhpAthYwZzG5E5CSe2b
ZdU8oG+YXZUDy9drGcahckmZpJwszRCnk2A7FepSwnMsRHg/hngisxjdAfqIv7eX
cka/KZD1+lFJ4N30awoixVJapYcepfMa1/7tModQl9wZ9VKY6zbQ0/U4Bwe0d48F
CX+iYNgkxQdfuWp0U6FEeQ9nGkO2wYABq0+sH21SjnE4/MxyjzKd+cGO3RGdNKqS
9A5nnHx1L1T3zgHNGfGKQzO89/OlT0VTO48HdjlokHfsuSTm77kdfE5MQpjavZKZ
j4Az2XCFZC6ZBqbopfP5jeMZb5X54iumrR0pzQDihCvYZf1NUCkxEtVfi1uyKo/6
38P+JCpKVJMfc8ra1eYTSWFZd75QuL+QmnZOUCjBKW2sPA5FlDrNESu4+DHBUQm8
quLCPgKhp5NeID5crnbTIXrUBokPtzlZmtHK9LTXy3OZGWRkyQKCAQEA1t8TaugB
jLQR65pSldWL7UJuFVVTUoCJPypyNB9/sUlL/MwTAlyaXz/yMyCeBukwrpLOS44s
hndBRN/vlvDBhJ/V7Xh0DQe/0ijs4ItcXCYi7xEqfNwQPMBD+5rZGJyMb8KKWv0K
ARxDKI4c+KRD0zd5wVmzVRN7KfU3OqWleuN3LLVj7tzaOdOlRSA7aiBM/hud5TQ9
E0pAwH8Hhc1amjiC8tBlaFegIhuzIzxMSXHRBUp3lh3/oe36382nsELclN1hUVDk
t3TAZctyXFB3HE2tzIvoqEJQ7Fdws05EPeqH81NzGcFTMKSbyRs6kXc8ECHOsiXH
zL7yvR7KPfTvawKCAQEAvIVVQWyiqNRq7SBGws87Z5cdYI8gpJB8lYrJIji4rUEE
Mx2gUx+Xhs9A2Rs41gXlawoEcjP9berve631VNWC4+t9pTvVojqXmrvZ5UD7uvCI
FQO//IIGjkKEfDpIH/qlDRVelFSSRc9Q/cJbfSpKblbrXgQmtnJ9jlBJE/SLImxP
79DUtifZlypUQl9yc8Rg1RbjrAkcAVa9PG1t7phSjrddtJmtURkEthXY77Gw9XrT
80ZRGvJHKIlZPfhqvZSFC881RIgIiF+BulhnmzMJ4vgXyq0T+QcUF7AAtPQSHr0r
Bnp7ReP1yGm5P7t23fEM4gDuDCAPt4GYYyLEcgizZQKCAQEAhOLTbHNtueoHhzEa
0CWQczx5PmJVtJlfyBvlI00zuJ3/C6neO8CvjCbNEEek09tVyzFpuhqEeOi6CfGA
iFX/KJl9QW8UPpbDUCMVVE174lWHl1iDcVLcC+ZQZQPALdNrmxayYFFL5aHn+uXh
DvjwJWmSuDxUh1IUAr/v1yporBaPNqw70Jbvs4GsHZMwi5LMav8DaKQk/ZAXebVW
Hq8A0I4QlkDB5oUCuPVulWUOPQHR5jbGvKVy2l+GnvDeO0kuiDZdoF+qA7eF4a6v
3F27PFriGLWSPrU8vN3bCllmJPCuAZNjhNMmMtgqrHZVg289SzDNVym8ZmjVUJcB
Ns4LXwKCAQEAt4nkKA8ZCd/MvlInMjDJ+B+ydTjDohQdhwYYrh2lBuB+szlLxsHC
J3iN/RE4s34IDr8w9vLQ8HvDbpk9ebtpdHbn27+2TPxYb0gmasFqk2TsR/Ffr/nz
Yfs2uy+O2ruOh39flnAD/L1Lb9MSeXh8AJLVEbJe8k/jF3Powenar8fdx3B8N1/y
wu5uHDSK3FS7pZpkTDCOOGt3T2aGmb1o2xOAwny/tW3zHUeF7k8EJurPgVDbU62/
Q6L85I1/dlUrdwTkKnV6QTMivQamz/3PyU6a8zKweEnA8RLkjUf2gEDRq7wBWlkH
CHiN55OetZOiZEJdgCaSxqkAcLv/n7oC1QKCAQEAqFCGT1VXn2Pa0tT6d+oFvXa9
HCU11DloZwXT968rhF8BRk7KEUoezEv6bMFluL3jOc03dQK5ZQtjvTBFJbW75VLV
artuSLbU/BW+gDkYZk3Cn5gPzC9HldCks+KIC8rAqCOuoMG77HYNW+7rBKKwbwl5
kCAmnJa65fYs7CYzD98foG+VllsnUX+m1LLeKcrPDeiiqndBaSZ/MERgZa6Ivbwi
05m6qj/vW/VbWNb5Txgcy1jN9zQmbN4RtfgsssJFfsKrMKIqVzu6CLpBxypNQvGa
4Ku3TVFroshYqYJLVmqVIXOWVfOHA4LOei6kCe9Gi42v7jKMx3GD+nB+PVmQWg==
-----END RSA PRIVATE KEY-----

View File

@@ -0,0 +1,28 @@
-----BEGIN CERTIFICATE REQUEST-----
MIIE0DCCArgCAQAwgYoxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlh
MRUwEwYDVQQHDAxTYW5GcmFuY2lzY28xETAPBgNVBAoMCEJha2VyeUlBMRAwDgYD
VQQLDAdTdG9yYWdlMSowKAYDVQQDDCFtaW5pby5iYWtlcnktaWEuc3ZjLmNsdXN0
ZXIubG9jYWwwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCN+XNRDxtM
siHWRzlX5bWLjd2la1w0HeUbzZ8l4mVRQTvgVZ2ilhZ4g749D3hE2HK4PV7yDXyN
ofIz91s5CCIK9iuReukoYeTST0VRrNNUd72oe1oxp4v+iPOOQE8K6IH63ztc8EHZ
0cHxNVrm7HCVLFFG09WGn9th9b51OVhCUFTyQqfvnL3rhvL0vvx7xTuVISGhw8wc
/7DZPR2OFdSS8raVpWqy+vi0lgjQnbdcaI43t+2tfqHi3E3mJ1h3SR4YQJh0FWMI
ULcIW3GcOKxQ2r9mAh1JeAR9BtVRQnFF4ZBlnN4nwd0IdmFLofvFgnylIJ5lm1kT
/aIkAhljKPiWZhDmLayMlxf+YgtzPhoUtGt5tPfmXPDosYv5BNl/7PD3gem+Gqmn
KRb4Sxz+6jDDMCijk/+QSh9ri3rDCjoiwxgi1p7lFDZukbR02XSVUmrTZljmDOPg
tnMXhVNbr0ftWBtraynEGEIKIJrAG/XWmlgL+9rQ/2VHRuPbBplfY1azIvAHcxag
xK4xW0cA9HAj8WSmEt30WplLXCeHv8UY2FKSk3cPmBp7QIAwKxqdeFuUQTcsT8p6
wDCwZuP8irD9JMmbaLn+GyRFJkbXfcmLnWEKO7LqtEP7tfKEb+Vz2kTKv488heQo
AUVPJRBiZsrwcoWlGlQ2iWmM9bW3ZGkzhwIDAQABoAAwDQYJKoZIhvcNAQELBQAD
ggIBABINqJhSOOh+ZnFFKCz0hRIKZdAiDYXtwNDY0nGS6ZUkCTIqbx8m+iGmAyn+
zCtoN5AK73U6QMeMHKebL6Yfhjh5HvVWqRb/dbXwgasVusOQMXmYCvkLOuSKjSUf
3jWhJrA9I1Vg41vfoZmyy3u6g7/uRmOgSAhVB0Dk44GAlzW0jpZIBveQ4H0M1PHV
HGAXaZKLmmnHTpC3ilsaQTKF5yjVXMmJ85VnyMlo/Kxiv+XujKdt1Dp0BRk+IPpW
DFNAY2joOAPOvJImH/7k0YrE3JZl11e4pyI1BO5SpKllWfPMhw0kqsTrtrnTeAgW
eguP+fkXZaCeOyUyvatUMW7+lVcKu2Gqs/tPpn7PaAVyb1dTN5L9E32o6f9dJ7ew
mD+mw+p+dKwhTSsf20irVZmNEYTyE40fnQQeR41fM5a8uGMxegIfKSwtWgDgEjE7
z3L034/g+RQop0nyRhCb52HKlWHniGM/w+/S+2Rn6Ac7R7L3gAuNrdCLD9bWIcXe
jaGQeh0zrp/TtXk/D+81KLdixGbWCzoeCggFzaFZUHf/4AT8lI6qZhp8IouBIL51
b2A9Gz1yro0y6YaJai2HDDu7emVCCGgu45i8yxh09jwIR/MRjBWBDHw29xixbIiN
af6SBracitghXnTxpOjLLfN1amp7i0CUe51HzxLblxNcnZM7
-----END CERTIFICATE REQUEST-----

View File

@@ -0,0 +1,27 @@
[req]
distinguished_name = req_distinguished_name
req_extensions = v3_req
prompt = no
[req_distinguished_name]
C = US
ST = California
L = SanFrancisco
O = BakeryIA
OU = Storage
CN = minio.bakery-ia.svc.cluster.local
[v3_req]
keyUsage = keyEncipherment, dataEncipherment
extendedKeyUsage = serverAuth, clientAuth
subjectAltName = @alt_names
[alt_names]
DNS.1 = minio.bakery-ia.svc.cluster.local
DNS.2 = minio.bakery-ia
DNS.3 = minio-console.bakery-ia.svc.cluster.local
DNS.4 = minio-console.bakery-ia
DNS.5 = minio
DNS.6 = minio-console
DNS.7 = localhost
IP.1 = 127.0.0.1

270
scripts/cleanup_disk_space.py Executable file
View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
"""
Bakery IA Disk Space Cleanup Script
===================================
This script performs comprehensive cleanup of Docker and Kubernetes resources
to prevent disk space exhaustion during development.
Features:
- Automatic cleanup based on disk space thresholds
- Manual cleanup on demand
- Comprehensive resource cleanup (images, containers, volumes, etc.)
- Detailed reporting and logging
Usage:
./scripts/cleanup_disk_space.py [--manual] [--threshold GB] [--verbose]
Environment Variables:
TILT_DISK_THRESHOLD_GB - Minimum free space required (default: 10GB)
TILT_CLEANUP_VERBOSE - Set to "true" for verbose output
"""
import subprocess
import sys
import os
import argparse
import time
from datetime import datetime
def get_disk_space():
"""Get available disk space in GB"""
try:
result = subprocess.run(['df', '/', '--output=avail', '-h'],
capture_output=True, text=True, check=True)
# Extract numeric value from output like "15G"
output = result.stdout.strip().split('\n')[-1].strip()
if 'G' in output:
return float(output.replace('G', ''))
elif 'M' in output:
return float(output.replace('M', '')) / 1024
else:
return 0
except Exception as e:
print(f"⚠️ Could not check disk space: {e}")
return 999 # Assume plenty of space if we can't check
def cleanup_docker_images(verbose=False):
"""Clean up old and unused Docker images"""
if verbose:
print("🧹 Cleaning up Docker images...")
try:
# Remove dangling images
if verbose:
print(" Removing dangling images...")
subprocess.run(['docker', 'image', 'prune', '-f'],
capture_output=True, text=True)
# Remove unused images (not referenced by any container)
if verbose:
print(" Removing unused images...")
subprocess.run(['docker', 'image', 'prune', '-a', '-f'],
capture_output=True, text=True)
# Remove old images (older than 2 hours)
if verbose:
print(" Removing old images (>2 hours)...")
subprocess.run(['docker', 'image', 'prune', '-a', '-f',
'--filter', 'until=2h'],
capture_output=True, text=True)
if verbose:
print("✅ Docker image cleanup completed")
return True
except Exception as e:
print(f"⚠️ Docker image cleanup failed: {e}")
return False
def cleanup_docker_containers(verbose=False):
"""Clean up stopped containers"""
if verbose:
print("🧹 Cleaning up Docker containers...")
try:
# Remove stopped containers
if verbose:
print(" Removing stopped containers...")
subprocess.run(['docker', 'container', 'prune', '-f'],
capture_output=True, text=True)
# Remove old containers (older than 1 hour)
if verbose:
print(" Removing old containers (>1 hour)...")
subprocess.run(['docker', 'container', 'prune', '-f',
'--filter', 'until=1h'],
capture_output=True, text=True)
if verbose:
print("✅ Docker container cleanup completed")
return True
except Exception as e:
print(f"⚠️ Docker container cleanup failed: {e}")
return False
def cleanup_docker_volumes(verbose=False):
"""Clean up unused volumes"""
if verbose:
print("🧹 Cleaning up Docker volumes...")
try:
# Remove unused volumes
if verbose:
print(" Removing unused volumes...")
subprocess.run(['docker', 'volume', 'prune', '-f'],
capture_output=True, text=True)
if verbose:
print("✅ Docker volume cleanup completed")
return True
except Exception as e:
print(f"⚠️ Docker volume cleanup failed: {e}")
return False
def cleanup_docker_system(verbose=False):
"""Clean up Docker system (build cache, networks, etc.)"""
if verbose:
print("🧹 Cleaning up Docker system...")
try:
# Remove build cache
if verbose:
print(" Removing build cache...")
subprocess.run(['docker', 'builder', 'prune', '-f'],
capture_output=True, text=True)
# Remove unused networks
if verbose:
print(" Removing unused networks...")
subprocess.run(['docker', 'network', 'prune', '-f'],
capture_output=True, text=True)
if verbose:
print("✅ Docker system cleanup completed")
return True
except Exception as e:
print(f"⚠️ Docker system cleanup failed: {e}")
return False
def cleanup_kubernetes_resources(verbose=False):
"""Clean up Kubernetes resources"""
if verbose:
print("🧹 Cleaning up Kubernetes resources...")
try:
# Remove completed jobs older than 1 hour
if verbose:
print(" Removing completed jobs (>1 hour)...")
subprocess.run(['kubectl', 'delete', 'jobs', '-n', 'bakery-ia',
'--field-selector=status.successful=1'],
capture_output=True, text=True)
# Remove failed jobs older than 1 hour
if verbose:
print(" Removing failed jobs (>1 hour)...")
subprocess.run(['kubectl', 'delete', 'jobs', '-n', 'bakery-ia',
'--field-selector=status.failed>0'],
capture_output=True, text=True)
if verbose:
print("✅ Kubernetes resource cleanup completed")
return True
except Exception as e:
print(f"⚠️ Kubernetes resource cleanup failed: {e}")
return False
def perform_cleanup(manual=False, threshold_gb=10, verbose=False):
"""Perform comprehensive cleanup"""
print("\n" + "="*60)
print("🚀 STARTING COMPREHENSIVE CLEANUP")
print("="*60)
if manual:
print("🎛️ Mode: MANUAL (forced cleanup)")
else:
print("🎛️ Mode: AUTOMATIC (threshold-based)")
print(f"📊 Threshold: {threshold_gb}GB free space")
# Check disk space before cleanup
free_space_before = get_disk_space()
print(f"📊 Disk space before cleanup: {free_space_before:.1f}GB free")
# Check if cleanup is needed (unless manual)
if not manual and free_space_before >= threshold_gb:
print("✅ Sufficient disk space available, skipping cleanup")
return True
cleanup_results = []
# Perform all cleanup operations
cleanup_results.append(("Docker Images", cleanup_docker_images(verbose)))
cleanup_results.append(("Docker Containers", cleanup_docker_containers(verbose)))
cleanup_results.append(("Docker Volumes", cleanup_docker_volumes(verbose)))
cleanup_results.append(("Docker System", cleanup_docker_system(verbose)))
cleanup_results.append(("Kubernetes Resources", cleanup_kubernetes_resources(verbose)))
# Check disk space after cleanup
free_space_after = get_disk_space()
space_reclaimed = free_space_after - free_space_before
print(f"\n📊 Disk space after cleanup: {free_space_after:.1f}GB free")
print(f"🎯 Space reclaimed: {space_reclaimed:.1f}GB")
# Summary
print("\n📋 CLEANUP SUMMARY:")
for name, success in cleanup_results:
status = "✅ SUCCESS" if success else "❌ FAILED"
print(f" {name}: {status}")
print("="*60)
print("✅ CLEANUP COMPLETED")
print("="*60 + "\n")
return True
def main():
parser = argparse.ArgumentParser(
description='Bakery IA Disk Space Cleanup Script',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
./cleanup_disk_space.py # Automatic cleanup (checks threshold)
./cleanup_disk_space.py --manual # Force cleanup regardless of threshold
./cleanup_disk_space.py --threshold 5 # Use 5GB threshold
./cleanup_disk_space.py --verbose # Verbose output
"""
)
parser.add_argument('--manual', action='store_true',
help='Force cleanup regardless of disk space threshold')
parser.add_argument('--threshold', type=int, default=10,
help='Minimum free space required in GB (default: 10)')
parser.add_argument('--verbose', action='store_true',
help='Enable verbose output')
args = parser.parse_args()
# Get threshold from environment variable if set
env_threshold = os.getenv('TILT_DISK_THRESHOLD_GB')
if env_threshold:
try:
args.threshold = int(env_threshold)
except ValueError:
pass
# Get verbose from environment variable if set
env_verbose = os.getenv('TILT_CLEANUP_VERBOSE', 'false').lower()
if env_verbose == 'true':
args.verbose = True
return perform_cleanup(
manual=args.manual,
threshold_gb=args.threshold,
verbose=args.verbose
)
if __name__ == '__main__':
success = main()
sys.exit(0 if success else 1)

View File

@@ -1,10 +1,10 @@
# Forecasting Dockerfile
# Add this stage at the top of each service Dockerfile
# Forecasting Service Dockerfile with MinIO Support
# Multi-stage build for optimized production image
FROM python:3.11-slim AS shared
WORKDIR /shared
COPY shared/ /shared/
# Then your main service stage
# Main service stage
FROM python:3.11-slim
WORKDIR /app

View File

@@ -49,6 +49,18 @@ class ForecastingSettings(BaseServiceSettings):
PREDICTION_CACHE_TTL_HOURS: int = int(os.getenv("PREDICTION_CACHE_TTL_HOURS", "6"))
FORECAST_BATCH_SIZE: int = int(os.getenv("FORECAST_BATCH_SIZE", "100"))
# MinIO Configuration
MINIO_ENDPOINT: str = os.getenv("MINIO_ENDPOINT", "minio.bakery-ia.svc.cluster.local:9000")
MINIO_ACCESS_KEY: str = os.getenv("FORECASTING_MINIO_ACCESS_KEY", "forecasting-service")
MINIO_SECRET_KEY: str = os.getenv("FORECASTING_MINIO_SECRET_KEY", "forecasting-secret-key")
MINIO_USE_SSL: bool = os.getenv("MINIO_USE_SSL", "true").lower() == "true"
MINIO_MODEL_BUCKET: str = os.getenv("MINIO_MODEL_BUCKET", "training-models")
MINIO_CONSOLE_PORT: str = os.getenv("MINIO_CONSOLE_PORT", "9001")
MINIO_API_PORT: str = os.getenv("MINIO_API_PORT", "9000")
MINIO_REGION: str = os.getenv("MINIO_REGION", "us-east-1")
MINIO_MODEL_LIFECYCLE_DAYS: int = int(os.getenv("MINIO_MODEL_LIFECYCLE_DAYS", "90"))
MINIO_CACHE_TTL_SECONDS: int = int(os.getenv("MINIO_CACHE_TTL_SECONDS", "3600"))
# Real-time Forecasting
REALTIME_FORECASTING_ENABLED: bool = os.getenv("REALTIME_FORECASTING_ENABLED", "true").lower() == "true"
FORECAST_UPDATE_INTERVAL_HOURS: int = int(os.getenv("FORECAST_UPDATE_INTERVAL_HOURS", "6"))

View File

@@ -16,6 +16,7 @@ import httpx
from pathlib import Path
import os
import joblib
import io
from app.core.config import settings
from shared.monitoring.metrics import MetricsCollector
@@ -578,118 +579,114 @@ class PredictionService:
return adjusted
async def _load_model(self, model_id: str, model_path: str):
"""Load model from file with improved validation and error handling"""
# Enhanced model file validation
if not await self._validate_model_file(model_path):
logger.error(f"Model file not valid: {model_path}")
return None
"""Load model from MinIO with improved validation and error handling"""
# Check cache first
if model_id in self.model_cache:
cached_model, cached_time = self.model_cache[model_id]
if (datetime.now() - cached_time).seconds < self.cache_ttl:
logger.debug(f"Model loaded from cache: {model_id}")
return cached_model
# Validate MinIO path format
if not await self._validate_model_file(model_path):
logger.error(f"Model path not valid: {model_path}")
return None
try:
if os.path.exists(model_path):
# Try multiple loading methods for compatibility
model = await self._load_model_safely(model_path)
if model is None:
logger.error(f"Failed to load model from: {model_path}")
return None
# Cache the model
self.model_cache[model_id] = (model, datetime.now())
logger.info(f"Model loaded successfully: {model_path}")
return model
else:
logger.error(f"Model file not found: {model_path}")
# Load from MinIO
model = await self._load_model_safely(model_path)
if model is None:
logger.error(f"Failed to load model from MinIO: {model_path}")
return None
# Cache the model
self.model_cache[model_id] = (model, datetime.now())
logger.info(f"Model loaded successfully from MinIO: {model_path}")
return model
except Exception as e:
logger.error(f"Error loading model: {e}")
logger.error(f"Error loading model from MinIO: {e}")
return None
async def _load_model_safely(self, model_path: str):
"""Safely load model with multiple fallback methods"""
# Method 1: Try joblib first (recommended for sklearn/Prophet models)
"""Load model from MinIO storage (clean implementation - MinIO only)"""
try:
logger.debug(f"Attempting to load model with joblib: {model_path}")
model = joblib.load(model_path)
logger.info(f"Model loaded successfully with joblib")
return model
# Parse MinIO path: minio://bucket_name/object_path
_, bucket_and_path = model_path.split("://", 1)
bucket_name, object_name = bucket_and_path.split("/", 1)
logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
# Use MinIO client
from shared.clients.minio_client import minio_client
# Download model data
model_data = minio_client.get_object(bucket_name, object_name)
if not model_data:
logger.error(f"Failed to download model from MinIO: {model_path}")
return None
# Try joblib first (using BytesIO since joblib.load reads from file-like objects)
try:
buffer = io.BytesIO(model_data)
model = joblib.load(buffer)
logger.info(f"Model loaded successfully from MinIO with joblib")
return model
except Exception as e:
logger.warning(f"Joblib loading from MinIO failed: {e}")
# Try pickle as fallback
try:
model = pickle.loads(model_data)
logger.info(f"Model loaded successfully from MinIO with pickle")
return model
except Exception as e:
logger.warning(f"Pickle loading from MinIO failed: {e}")
logger.error(f"All loading methods failed for MinIO object: {model_path}")
return None
except Exception as e:
logger.warning(f"Joblib loading failed: {e}")
# Method 2: Try pickle as fallback
try:
logger.debug(f"Attempting to load model with pickle: {model_path}")
with open(model_path, 'rb') as f:
model = pickle.load(f)
logger.info(f"Model loaded successfully with pickle")
return model
except Exception as e:
logger.warning(f"Pickle loading failed: {e}")
# Method 3: Try pandas pickle (for Prophet models saved with pandas)
try:
logger.debug(f"Attempting to load model with pandas: {model_path}")
import pandas as pd
model = pd.read_pickle(model_path)
logger.info(f"Model loaded successfully with pandas")
return model
except Exception as e:
logger.warning(f"Pandas loading failed: {e}")
logger.error(f"All loading methods failed for: {model_path}")
return None
logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
return None
async def _validate_model_file(self, model_path: str) -> bool:
"""Enhanced model file validation"""
"""Validate MinIO model path and check object exists"""
try:
if not os.path.exists(model_path):
logger.error(f"Model file not found: {model_path}")
# Validate MinIO path format
if not model_path.startswith("minio://"):
logger.error(f"Invalid model path format (expected minio://): {model_path}")
return False
# Check file size (should be > 1KB for a trained model)
file_size = os.path.getsize(model_path)
if file_size < 1024:
logger.warning(f"Model file too small ({file_size} bytes): {model_path}")
return False
# More comprehensive file format detection
# Parse MinIO path
try:
with open(model_path, 'rb') as f:
header = f.read(16) # Read more bytes for better detection
# Check for various pickle/joblib signatures
valid_signatures = [
b']\x93PICKLE', # Joblib
b'\x80\x03', # Pickle protocol 3
b'\x80\x04', # Pickle protocol 4
b'\x80\x05', # Pickle protocol 5
b'}\x94', # Newer joblib format
b'}\x93', # Alternative joblib format
]
is_valid_format = any(header.startswith(sig) for sig in valid_signatures)
if not is_valid_format:
# Log header for debugging but don't fail validation
logger.warning(f"Unrecognized file header: {header[:8]} for {model_path}")
logger.info("Proceeding with loading attempt despite unrecognized header")
# Return True to allow loading attempt - some valid files may have different headers
return True
return True
except Exception as e:
logger.error(f"Error reading model file header: {e}")
_, bucket_and_path = model_path.split("://", 1)
bucket_name, object_name = bucket_and_path.split("/", 1)
except ValueError:
logger.error(f"Cannot parse MinIO path: {model_path}")
return False
# Check if object exists in MinIO
from shared.clients.minio_client import minio_client
if not minio_client.object_exists(bucket_name, object_name):
logger.error(f"Model object not found in MinIO: {bucket_name}/{object_name}")
return False
# Check object metadata for size validation
metadata = minio_client.get_object_metadata(bucket_name, object_name)
if metadata:
file_size = metadata.get("size", 0)
if file_size < 1024:
logger.warning(f"Model object too small ({file_size} bytes): {model_path}")
return False
logger.debug(f"Model validated in MinIO: {bucket_name}/{object_name}, size={file_size}")
return True
except Exception as e:
logger.error(f"Model validation error: {e}")
return False

View File

@@ -31,6 +31,7 @@ scikit-learn==1.6.1
pandas==2.2.3
numpy==2.2.2
joblib==1.4.2
minio==7.2.2
# Messaging
aio-pika==9.4.3

View File

@@ -1,10 +1,10 @@
# Training Dockerfile
# Add this stage at the top of each service Dockerfile
# Training Service Dockerfile with MinIO Support
# Multi-stage build for optimized production image
FROM python:3.11-slim AS shared
WORKDIR /shared
COPY shared/ /shared/
# Then your main service stage
# Main service stage
FROM python:3.11-slim
WORKDIR /app

View File

@@ -116,29 +116,51 @@ async def broadcast_training_progress(job_id: str, progress: dict):
await websocket_manager.broadcast(job_id, message)
```
### Model Artifact Management
### Model Artifact Management (MinIO Storage)
```python
# Model storage and retrieval
# Model storage and retrieval using MinIO
import joblib
from pathlib import Path
from shared.clients.minio_client import minio_client
# Save trained model
# Save trained model to MinIO
def save_model_artifact(model: Prophet, tenant_id: str, product_id: str) -> str:
"""Serialize and store model"""
model_dir = Path(f"/models/{tenant_id}/{product_id}")
model_dir.mkdir(parents=True, exist_ok=True)
"""Serialize and store model in MinIO"""
import io
version = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
model_path = model_dir / f"model_v{version}.pkl"
model_id = str(uuid.uuid4())
object_name = f"models/{tenant_id}/{product_id}/{model_id}.pkl"
joblib.dump(model, model_path)
return str(model_path)
# Serialize model (joblib.dump writes to file-like objects)
buffer = io.BytesIO()
joblib.dump(model, buffer)
model_data = buffer.getvalue()
# Load trained model
# Upload to MinIO
minio_client.put_object(
bucket_name="training-models",
object_name=object_name,
data=model_data,
content_type="application/octet-stream"
)
# Return MinIO path
return f"minio://training-models/{object_name}"
# Load trained model from MinIO
def load_model_artifact(model_path: str) -> Prophet:
"""Load serialized model"""
return joblib.load(model_path)
"""Load serialized model from MinIO"""
import io
# Parse MinIO path: minio://bucket_name/object_path
_, bucket_and_path = model_path.split("://", 1)
bucket_name, object_name = bucket_and_path.split("/", 1)
# Download from MinIO
model_data = minio_client.get_object(bucket_name, object_name)
# Deserialize (joblib.load reads from file-like objects)
buffer = io.BytesIO(model_data)
return joblib.load(buffer)
```
### Performance Metrics Calculation
@@ -194,8 +216,8 @@ def calculate_performance_metrics(model: Prophet, actual_data: pd.DataFrame) ->
- **Framework**: FastAPI (Python 3.11+) - Async web framework with WebSocket support
- **Database**: PostgreSQL 17 - Training logs, model metadata, job queue
- **ML Library**: Prophet (fbprophet) - Time series forecasting
- **Model Storage**: Joblib - Model serialization
- **File System**: Persistent volumes - Model artifact storage
- **Model Storage**: MinIO (S3-compatible) - Distributed object storage with TLS
- **Serialization**: Joblib - Model serialization
- **WebSocket**: FastAPI WebSocket - Real-time progress updates
- **Messaging**: RabbitMQ 4.1 - Training completion events
- **ORM**: SQLAlchemy 2.0 (async) - Database abstraction
@@ -442,7 +464,13 @@ websocket_messages_sent = Counter(
- `PORT` - Service port (default: 8004)
- `DATABASE_URL` - PostgreSQL connection string
- `RABBITMQ_URL` - RabbitMQ connection string
- `MODEL_STORAGE_PATH` - Path for model artifacts (default: /models)
**MinIO Configuration:**
- `MINIO_ENDPOINT` - MinIO server endpoint (default: minio.bakery-ia.svc.cluster.local:9000)
- `MINIO_ACCESS_KEY` - MinIO access key
- `MINIO_SECRET_KEY` - MinIO secret key
- `MINIO_USE_SSL` - Enable TLS (default: true)
- `MINIO_MODEL_BUCKET` - Bucket for models (default: training-models)
**Training Configuration:**
- `MAX_CONCURRENT_JOBS` - Maximum parallel training jobs (default: 3)
@@ -462,10 +490,9 @@ websocket_messages_sent = Counter(
- `WEBSOCKET_MAX_CONNECTIONS` - Max connections per tenant (default: 10)
- `WEBSOCKET_MESSAGE_QUEUE_SIZE` - Message buffer size (default: 100)
**Storage Configuration:**
- `MODEL_RETENTION_DAYS` - Days to keep old models (default: 90)
- `MAX_MODEL_VERSIONS_PER_PRODUCT` - Version limit (default: 10)
- `ENABLE_MODEL_COMPRESSION` - Compress model files (default: true)
**Storage Configuration (MinIO):**
- `MINIO_MODEL_LIFECYCLE_DAYS` - Days to keep old model versions (default: 90)
- `MINIO_CACHE_TTL_SECONDS` - Model cache TTL in seconds (default: 3600)
## Development Setup
@@ -473,7 +500,7 @@ websocket_messages_sent = Counter(
- Python 3.11+
- PostgreSQL 17
- RabbitMQ 4.1
- Persistent storage for model artifacts
- MinIO (S3-compatible object storage)
### Local Development
```bash
@@ -488,10 +515,13 @@ pip install -r requirements.txt
# Set environment variables
export DATABASE_URL=postgresql://user:pass@localhost:5432/training
export RABBITMQ_URL=amqp://guest:guest@localhost:5672/
export MODEL_STORAGE_PATH=/tmp/models
export MINIO_ENDPOINT=localhost:9000
export MINIO_ACCESS_KEY=minioadmin
export MINIO_SECRET_KEY=minioadmin
export MINIO_USE_SSL=false # Use true in production
# Create model storage directory
mkdir -p /tmp/models
# Start MinIO locally (if not using K8s)
docker run -p 9000:9000 -p 9001:9001 minio/minio server /data --console-address ":9001"
# Run database migrations
alembic upgrade head
@@ -590,7 +620,7 @@ for feature_name in poi_features.keys():
- **External Service** - Fetch weather, traffic, holiday, and POI feature data
- **PostgreSQL** - Store job queue, models, metrics, logs
- **RabbitMQ** - Publish training completion events
- **File System** - Store model artifacts
- **MinIO** - Store model artifacts (S3-compatible object storage with TLS)
### Dependents (Services That Call This)
- **Forecasting Service** - Load trained models for predictions
@@ -627,11 +657,11 @@ for feature_name in poi_features.keys():
4. **Resource Limits** - CPU/memory limits per training job
5. **Priority Queue** - Prioritize important products first
### Storage Optimization
1. **Model Compression** - Compress model artifacts (gzip)
2. **Old Model Cleanup** - Automatic deletion after retention period
3. **Version Limits** - Keep only N most recent versions
4. **Deduplication** - Avoid storing identical models
### Storage Optimization (MinIO)
1. **Object Versioning** - MinIO maintains version history automatically
2. **Lifecycle Policies** - Auto-cleanup old versions after 90 days
3. **TLS Encryption** - Secure communication with MinIO
4. **Distributed Storage** - MinIO handles replication and availability
### WebSocket Optimization
1. **Message Batching** - Batch progress updates (every 2 seconds)

View File

@@ -96,48 +96,48 @@ def check_system_resources() -> Dict[str, Any]:
def check_model_storage() -> Dict[str, Any]:
"""Check model storage health"""
"""Check MinIO model storage health"""
try:
storage_path = settings.MODEL_STORAGE_PATH
from shared.clients.minio_client import minio_client
if not os.path.exists(storage_path):
# Check MinIO connectivity
if not minio_client.health_check():
return {
"status": "warning",
"message": f"Model storage path does not exist: {storage_path}"
"status": "unhealthy",
"message": "MinIO service is not reachable",
"storage_type": "minio"
}
# Check if writable
test_file = os.path.join(storage_path, ".health_check")
try:
with open(test_file, 'w') as f:
f.write("test")
os.remove(test_file)
writable = True
except Exception:
writable = False
bucket_name = settings.MINIO_MODEL_BUCKET
# Count model files
model_files = 0
total_size = 0
for root, dirs, files in os.walk(storage_path):
for file in files:
if file.endswith('.pkl'):
model_files += 1
file_path = os.path.join(root, file)
total_size += os.path.getsize(file_path)
# Check if bucket exists
bucket_exists = minio_client.bucket_exists(bucket_name)
if not bucket_exists:
return {
"status": "warning",
"message": f"MinIO bucket does not exist: {bucket_name}",
"storage_type": "minio"
}
# Count model files in MinIO
model_objects = minio_client.list_objects(bucket_name, prefix="models/")
model_files = [obj for obj in model_objects if obj.endswith('.pkl')]
return {
"status": "healthy" if writable else "degraded",
"path": storage_path,
"writable": writable,
"model_files": model_files,
"total_size_mb": round(total_size / 1024 / 1024, 2)
"status": "healthy",
"storage_type": "minio",
"endpoint": settings.MINIO_ENDPOINT,
"bucket": bucket_name,
"use_ssl": settings.MINIO_USE_SSL,
"model_files": len(model_files),
"bucket_exists": bucket_exists
}
except Exception as e:
logger.error(f"Model storage check failed: {e}")
logger.error(f"MinIO storage check failed: {e}")
return {
"status": "error",
"storage_type": "minio",
"error": str(e)
}

View File

@@ -14,7 +14,6 @@ from app.services.training_service import EnhancedTrainingService
from datetime import datetime, timezone
from sqlalchemy import select, delete, func
import uuid
import shutil
from shared.auth.decorators import (
get_current_user_dep,
@@ -304,10 +303,9 @@ async def delete_tenant_models_complete(
"jobs_cancelled": 0,
"models_deleted": 0,
"artifacts_deleted": 0,
"artifacts_files_deleted": 0,
"minio_objects_deleted": 0,
"training_logs_deleted": 0,
"performance_metrics_deleted": 0,
"storage_freed_bytes": 0,
"errors": []
}
@@ -336,51 +334,35 @@ async def delete_tenant_models_complete(
deletion_stats["errors"].append(error_msg)
logger.error(error_msg)
# Step 2: Delete model artifact files from storage
# Step 2: Delete model artifact files from MinIO storage
try:
artifacts_query = select(ModelArtifact).where(
ModelArtifact.tenant_id == tenant_uuid
)
artifacts_result = await db.execute(artifacts_query)
artifacts = artifacts_result.scalars().all()
storage_freed = 0
from shared.clients.minio_client import minio_client
bucket_name = settings.MINIO_MODEL_BUCKET
prefix = f"models/{tenant_id}/"
# List all objects for this tenant
objects_to_delete = minio_client.list_objects(bucket_name, prefix=prefix)
files_deleted = 0
for artifact in artifacts:
for obj_name in objects_to_delete:
try:
file_path = Path(artifact.file_path)
if file_path.exists():
file_size = file_path.stat().st_size
file_path.unlink() # Delete file
storage_freed += file_size
files_deleted += 1
logger.debug("Deleted artifact file",
file_path=str(file_path),
size_bytes=file_size)
# Also try to delete parent directories if empty
try:
if file_path.parent.exists() and not any(file_path.parent.iterdir()):
file_path.parent.rmdir()
except:
pass # Ignore errors cleaning up directories
minio_client.delete_object(bucket_name, obj_name)
files_deleted += 1
logger.debug("Deleted MinIO object", object_name=obj_name)
except Exception as e:
error_msg = f"Error deleting artifact file {artifact.file_path}: {str(e)}"
error_msg = f"Error deleting MinIO object {obj_name}: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.warning(error_msg)
deletion_stats["artifacts_files_deleted"] = files_deleted
deletion_stats["storage_freed_bytes"] = storage_freed
logger.info("Deleted artifact files",
deletion_stats["minio_objects_deleted"] = files_deleted
logger.info("Deleted MinIO objects",
tenant_id=tenant_id,
files_deleted=files_deleted,
storage_freed_mb=storage_freed / (1024 * 1024))
files_deleted=files_deleted)
except Exception as e:
error_msg = f"Error processing artifact files: {str(e)}"
error_msg = f"Error processing MinIO objects: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.error(error_msg)
@@ -463,19 +445,7 @@ async def delete_tenant_models_complete(
detail=error_msg
)
# Step 4: Clean up tenant model directory
try:
tenant_model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
if tenant_model_dir.exists():
shutil.rmtree(tenant_model_dir)
logger.info("Deleted tenant model directory",
directory=str(tenant_model_dir))
except Exception as e:
error_msg = f"Error deleting model directory: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.warning(error_msg)
# Models deleted successfully
# Step 4: Models deleted successfully (MinIO cleanup already done in Step 2)
return {
"success": True,
"message": f"All training data for tenant {tenant_id} deleted successfully",

View File

@@ -44,6 +44,18 @@ class TrainingSettings(BaseServiceSettings):
MODEL_BACKUP_ENABLED: bool = os.getenv("MODEL_BACKUP_ENABLED", "true").lower() == "true"
MODEL_VERSIONING_ENABLED: bool = os.getenv("MODEL_VERSIONING_ENABLED", "true").lower() == "true"
# MinIO Configuration
MINIO_ENDPOINT: str = os.getenv("MINIO_ENDPOINT", "minio.bakery-ia.svc.cluster.local:9000")
MINIO_ACCESS_KEY: str = os.getenv("MINIO_ACCESS_KEY", "training-service")
MINIO_SECRET_KEY: str = os.getenv("MINIO_SECRET_KEY", "training-secret-key")
MINIO_USE_SSL: bool = os.getenv("MINIO_USE_SSL", "true").lower() == "true"
MINIO_MODEL_BUCKET: str = os.getenv("MINIO_MODEL_BUCKET", "training-models")
MINIO_CONSOLE_PORT: str = os.getenv("MINIO_CONSOLE_PORT", "9001")
MINIO_API_PORT: str = os.getenv("MINIO_API_PORT", "9000")
MINIO_REGION: str = os.getenv("MINIO_REGION", "us-east-1")
MINIO_MODEL_LIFECYCLE_DAYS: int = int(os.getenv("MINIO_MODEL_LIFECYCLE_DAYS", "90"))
MINIO_CACHE_TTL_SECONDS: int = int(os.getenv("MINIO_CACHE_TTL_SECONDS", "3600"))
# Training Configuration
MAX_CONCURRENT_TRAINING_JOBS: int = int(os.getenv("MAX_CONCURRENT_TRAINING_JOBS", "3"))

View File

@@ -5,6 +5,7 @@ Combines Prophet's seasonality modeling with XGBoost's pattern learning
import pandas as pd
import numpy as np
import io
from typing import Dict, List, Any, Optional, Tuple
import structlog
from datetime import datetime, timezone
@@ -110,8 +111,8 @@ class HybridProphetXGBoost:
# Step 4: Get Prophet predictions on training data
logger.info("Step 3: Generating Prophet predictions for residual calculation")
train_prophet_pred = self._get_prophet_predictions(prophet_result, train_df)
val_prophet_pred = self._get_prophet_predictions(prophet_result, val_df)
train_prophet_pred = await self._get_prophet_predictions(prophet_result, train_df)
val_prophet_pred = await self._get_prophet_predictions(prophet_result, val_df)
# Step 5: Calculate residuals (actual - prophet_prediction)
train_residuals = train_df['y'].values - train_prophet_pred
@@ -207,7 +208,7 @@ class HybridProphetXGBoost:
return df_enhanced
def _get_prophet_predictions(
async def _get_prophet_predictions(
self,
prophet_result: Dict[str, Any],
df: pd.DataFrame
@@ -230,8 +231,13 @@ class HybridProphetXGBoost:
# Load the actual Prophet model from the stored path
try:
import joblib
prophet_model = joblib.load(model_path)
if model_path.startswith("minio://"):
# Use prophet_manager to load from MinIO
prophet_model = await self.prophet_manager._load_model_from_minio(model_path)
else:
# Fallback to direct loading for local paths
import joblib
prophet_model = joblib.load(model_path)
except Exception as e:
raise ValueError(f"Failed to load Prophet model from path {model_path}: {str(e)}")
@@ -417,8 +423,13 @@ class HybridProphetXGBoost:
# Load the Prophet model from the stored path
try:
import joblib
prophet_model = joblib.load(prophet_model_path)
if prophet_model_path.startswith("minio://"):
# Use prophet_manager to load from MinIO
prophet_model = await self.prophet_manager._load_model_from_minio(prophet_model_path)
else:
# Fallback to direct loading for local paths
import joblib
prophet_model = joblib.load(prophet_model_path)
except Exception as e:
raise ValueError(f"Failed to load Prophet model from path {prophet_model_path}: {str(e)}")

View File

@@ -13,6 +13,7 @@ from datetime import datetime, timedelta
import uuid
import os
import joblib
import io
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
import json
@@ -85,9 +86,24 @@ class BakeryProphetManager:
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "training-service")
self.db_session = None # Will be set when session is available
# Ensure model storage directory exists
os.makedirs(settings.MODEL_STORAGE_PATH, exist_ok=True)
# Initialize MinIO client and ensure bucket exists
from shared.clients.minio_client import minio_client
self.minio_client = minio_client
self._ensure_minio_bucket()
def _ensure_minio_bucket(self):
"""Ensure the training-models bucket exists in MinIO"""
try:
bucket_name = settings.MINIO_MODEL_BUCKET
if not self.minio_client.bucket_exists(bucket_name):
self.minio_client.create_bucket(bucket_name)
logger.info(f"Created MinIO bucket: {bucket_name}")
else:
logger.debug(f"MinIO bucket already exists: {bucket_name}")
except Exception as e:
logger.error(f"Failed to ensure MinIO bucket exists: {e}")
# Don't raise - bucket might be created by init job
async def train_bakery_model(self,
tenant_id: str,
inventory_product_id: str,
@@ -706,18 +722,40 @@ class BakeryProphetManager:
session = None) -> str:
"""Store model with database integration"""
# Create model directory
model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
model_dir.mkdir(parents=True, exist_ok=True)
# Store model in MinIO (clean implementation - MinIO only)
# Use BytesIO buffer since joblib.dump() writes to file-like objects
buffer = io.BytesIO()
joblib.dump(model, buffer)
model_data = buffer.getvalue()
object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.pkl"
# Use MinIO client
from shared.clients.minio_client import minio_client
# Upload model to MinIO
success = minio_client.put_object(
bucket_name="training-models",
object_name=object_name,
data=model_data,
content_type="application/octet-stream",
metadata={
"model_id": model_id,
"tenant_id": tenant_id,
"inventory_product_id": inventory_product_id,
"model_type": "prophet_optimized"
}
)
if not success:
raise Exception("Failed to upload model to MinIO")
# Return MinIO object path
model_path = f"minio://training-models/{object_name}"
# Calculate checksum for model data
import hashlib
model_checksum = hashlib.sha256(model_data).hexdigest()
# Store model file
model_path = model_dir / f"{model_id}.pkl"
joblib.dump(model, model_path)
# Calculate checksum for model file integrity
checksummed_file = ChecksummedFile(str(model_path))
model_checksum = checksummed_file.calculate_and_save_checksum()
# Enhanced metadata with checksum
metadata = {
"model_id": model_id,
@@ -733,14 +771,23 @@ class BakeryProphetManager:
"optimized_parameters": optimized_params or {},
"created_at": datetime.now().isoformat(),
"model_type": "prophet_optimized",
"file_path": str(model_path),
"minio_path": model_path,
"checksum": model_checksum,
"checksum_algorithm": "sha256"
}
# Store metadata in MinIO as well
metadata_json = json.dumps(metadata, indent=2, default=str)
metadata_object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.json"
minio_client.put_object(
bucket_name="training-models",
object_name=metadata_object_name,
data=metadata_json,
content_type="application/json"
)
metadata_path = model_path.with_suffix('.json')
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2, default=str)
# Define metadata_path for database record
metadata_path = f"minio://training-models/{metadata_object_name}"
# Store in memory
model_key = f"{tenant_id}:{inventory_product_id}"
@@ -854,16 +901,10 @@ class BakeryProphetManager:
model_path: str,
future_dates: pd.DataFrame,
regressor_columns: List[str]) -> pd.DataFrame:
"""Generate forecast using stored model with checksum verification"""
"""Generate forecast using stored model from MinIO"""
try:
# Verify model file integrity before loading
checksummed_file = ChecksummedFile(model_path)
if not checksummed_file.load_and_verify_checksum():
logger.warning(f"Checksum verification failed for model: {model_path}")
# Still load the model but log warning
# In production, you might want to raise an exception instead
model = joblib.load(model_path)
# Load model from MinIO
model = await self._load_model_from_minio(model_path)
for regressor in regressor_columns:
if regressor not in future_dates.columns:
@@ -876,6 +917,33 @@ class BakeryProphetManager:
except Exception as e:
logger.error(f"Failed to generate forecast: {str(e)}")
raise
async def _load_model_from_minio(self, model_path: str):
"""Load model from MinIO storage"""
try:
# Parse MinIO path: minio://bucket_name/object_path
if not model_path.startswith("minio://"):
raise ValueError(f"Invalid MinIO path: {model_path}")
_, bucket_and_path = model_path.split("://", 1)
bucket_name, object_name = bucket_and_path.split("/", 1)
logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
# Download model data from MinIO
model_data = self.minio_client.get_object(bucket_name, object_name)
if not model_data:
raise ValueError(f"Failed to download model from MinIO: {model_path}")
# Deserialize model (using BytesIO since joblib.load reads from file-like objects)
buffer = io.BytesIO(model_data)
model = joblib.load(buffer)
logger.info(f"Model loaded successfully from MinIO: {model_path}")
return model
except Exception as e:
logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
raise
async def _validate_training_data(self, df: pd.DataFrame, inventory_product_id: str):
"""Validate training data quality (unchanged)"""

View File

@@ -17,6 +17,7 @@ scikit-learn==1.6.1
pandas==2.2.3
numpy==2.2.2
joblib==1.4.2
minio==7.2.2
xgboost==2.1.3
# HTTP client

View File

@@ -0,0 +1,418 @@
"""
MinIO Client Library
Shared client for MinIO object storage operations with TLS support
"""
import os
import io
import ssl
import time
import urllib3
from typing import Optional, Dict, Any, Union
from pathlib import Path
from functools import wraps
from minio import Minio
from minio.error import S3Error
import structlog
# Configure logger
logger = structlog.get_logger()
def with_retry(max_retries: int = 3, base_delay: float = 1.0, max_delay: float = 30.0):
"""Decorator for retrying operations with exponential backoff
Args:
max_retries: Maximum number of retry attempts
base_delay: Initial delay between retries in seconds
max_delay: Maximum delay between retries in seconds
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
last_exception = None
for attempt in range(max_retries + 1):
try:
return func(*args, **kwargs)
except (S3Error, urllib3.exceptions.HTTPError, ConnectionError, TimeoutError) as e:
last_exception = e
if attempt < max_retries:
# Exponential backoff with jitter
delay = min(base_delay * (2 ** attempt), max_delay)
logger.warning(
f"MinIO operation failed, retrying in {delay:.1f}s",
attempt=attempt + 1,
max_retries=max_retries,
error=str(e)
)
time.sleep(delay)
else:
logger.error(
"MinIO operation failed after all retries",
attempts=max_retries + 1,
error=str(e)
)
raise last_exception
return wrapper
return decorator
class MinIOClient:
"""Client for MinIO object storage operations with TLS support"""
def __init__(self):
"""Initialize MinIO client with configuration"""
self._client = None
self._initialize_client()
def _initialize_client(self) -> None:
"""Initialize MinIO client from environment variables with SSL/TLS support"""
try:
# Get configuration from environment
endpoint = os.getenv("MINIO_ENDPOINT", "minio.bakery-ia.svc.cluster.local:9000")
access_key = os.getenv("MINIO_ACCESS_KEY", os.getenv("MINIO_ROOT_USER", "admin"))
secret_key = os.getenv("MINIO_SECRET_KEY", os.getenv("MINIO_ROOT_PASSWORD", "secure-password"))
use_ssl = os.getenv("MINIO_USE_SSL", "true").lower() == "true"
# TLS certificate paths (optional - for cert verification)
ca_cert_path = os.getenv("MINIO_CA_CERT_PATH", "/etc/ssl/certs/minio-ca.crt")
# SSL verification is disabled by default for internal cluster with self-signed certs
# Set MINIO_VERIFY_SSL=true and provide CA cert path for production with proper certs
verify_ssl = os.getenv("MINIO_VERIFY_SSL", "false").lower() == "true"
# Try to get settings from service configuration if available
try:
from app.core.config import settings
if hasattr(settings, 'MINIO_ENDPOINT'):
endpoint = settings.MINIO_ENDPOINT
access_key = settings.MINIO_ACCESS_KEY
secret_key = settings.MINIO_SECRET_KEY
use_ssl = settings.MINIO_USE_SSL
except ImportError:
# Fallback to environment variables (for shared client usage)
pass
# Configure HTTP client with TLS settings
http_client = None
if use_ssl:
# Create custom HTTP client for TLS
if verify_ssl and os.path.exists(ca_cert_path):
# Verify certificates against CA
http_client = urllib3.PoolManager(
timeout=urllib3.Timeout(connect=10.0, read=60.0),
maxsize=10,
cert_reqs='CERT_REQUIRED',
ca_certs=ca_cert_path,
retries=urllib3.Retry(
total=5,
backoff_factor=0.2,
status_forcelist=[500, 502, 503, 504]
)
)
logger.info("MinIO TLS with certificate verification enabled",
ca_cert_path=ca_cert_path)
else:
# TLS without certificate verification (for self-signed certs in internal cluster)
# Still encrypted, just skips cert validation
http_client = urllib3.PoolManager(
timeout=urllib3.Timeout(connect=10.0, read=60.0),
maxsize=10,
cert_reqs='CERT_NONE',
retries=urllib3.Retry(
total=5,
backoff_factor=0.2,
status_forcelist=[500, 502, 503, 504]
)
)
# Suppress insecure request warnings for internal cluster
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logger.info("MinIO TLS enabled without certificate verification (internal cluster)")
# Initialize client with SSL/TLS
self._client = Minio(
endpoint,
access_key=access_key,
secret_key=secret_key,
secure=use_ssl,
http_client=http_client
)
logger.info("MinIO client initialized successfully",
endpoint=endpoint,
use_ssl=use_ssl,
verify_ssl=verify_ssl if use_ssl else False)
except Exception as e:
logger.error("Failed to initialize MinIO client", error=str(e))
raise
def reconnect(self) -> bool:
"""Reconnect to MinIO server
Useful when connection is lost or credentials have changed.
Returns:
True if reconnection succeeded, False otherwise
"""
try:
logger.info("Attempting to reconnect to MinIO...")
self._initialize_client()
return True
except Exception as e:
logger.error("Failed to reconnect to MinIO", error=str(e))
return False
@with_retry(max_retries=3, base_delay=1.0)
def bucket_exists(self, bucket_name: str) -> bool:
"""Check if bucket exists - handles limited permissions gracefully"""
try:
# First try the standard method
return self._client.bucket_exists(bucket_name)
except S3Error as e:
# If we get AccessDenied, try alternative method for limited-permission users
if e.code == "AccessDenied":
logger.debug("Access denied for bucket_exists, trying alternative method",
bucket_name=bucket_name)
try:
# Try to list objects - this works with ListBucket permission
# If bucket doesn't exist, this will raise NoSuchBucket error
# If bucket exists but user has no permission, this will raise AccessDenied
objects = list(self._client.list_objects(bucket_name, recursive=False))
logger.debug("Bucket exists (verified via list_objects)", bucket_name=bucket_name)
return True
except S3Error as list_error:
if list_error.code == "NoSuchBucket":
logger.debug("Bucket does not exist", bucket_name=bucket_name)
return False
else:
logger.error("Failed to check bucket existence (alternative method)",
bucket_name=bucket_name,
error=str(list_error))
return False
else:
logger.error("Failed to check bucket existence",
bucket_name=bucket_name,
error=str(e))
return False
def create_bucket(self, bucket_name: str, region: str = "us-east-1") -> bool:
"""Create a new bucket if it doesn't exist"""
try:
if not self.bucket_exists(bucket_name):
self._client.make_bucket(bucket_name, region)
logger.info("Created MinIO bucket", bucket_name=bucket_name)
return True
return False
except S3Error as e:
logger.error("Failed to create bucket",
bucket_name=bucket_name,
error=str(e))
return False
@with_retry(max_retries=3, base_delay=1.0)
def put_object(
self,
bucket_name: str,
object_name: str,
data: Union[bytes, io.BytesIO, str, Path],
length: Optional[int] = None,
content_type: str = "application/octet-stream",
metadata: Optional[Dict[str, str]] = None
) -> bool:
"""Upload an object to MinIO
Args:
bucket_name: Target bucket name
object_name: Object key/path in the bucket
data: Data to upload (bytes, BytesIO, string, or Path)
length: Optional data length (calculated automatically if not provided)
content_type: MIME type of the object
metadata: Optional metadata dictionary
Returns:
True if upload succeeded, False otherwise
"""
try:
# Ensure bucket exists
self.create_bucket(bucket_name)
# Convert data to bytes if needed
if isinstance(data, str):
data = data.encode('utf-8')
elif isinstance(data, Path):
with open(data, 'rb') as f:
data = f.read()
elif isinstance(data, io.BytesIO):
data = data.getvalue()
# Calculate length if not provided
data_length = length if length is not None else len(data)
# MinIO SDK requires BytesIO stream and explicit length
data_stream = io.BytesIO(data)
# Upload object with proper stream and length
self._client.put_object(
bucket_name,
object_name,
data_stream,
length=data_length,
content_type=content_type,
metadata=metadata
)
logger.info("Uploaded object to MinIO",
bucket_name=bucket_name,
object_name=object_name,
size=data_length)
return True
except S3Error as e:
logger.error("Failed to upload object",
bucket_name=bucket_name,
object_name=object_name,
error=str(e))
return False
@with_retry(max_retries=3, base_delay=1.0)
def get_object(self, bucket_name: str, object_name: str) -> Optional[bytes]:
"""Download an object from MinIO"""
try:
# Get object data
response = self._client.get_object(bucket_name, object_name)
data = response.read()
logger.info("Downloaded object from MinIO",
bucket_name=bucket_name,
object_name=object_name,
size=len(data))
return data
except S3Error as e:
logger.error("Failed to download object",
bucket_name=bucket_name,
object_name=object_name,
error=str(e))
return None
def object_exists(self, bucket_name: str, object_name: str) -> bool:
"""Check if object exists"""
try:
self._client.stat_object(bucket_name, object_name)
return True
except S3Error:
return False
def list_objects(self, bucket_name: str, prefix: str = "") -> list:
"""List objects in bucket with optional prefix"""
try:
objects = self._client.list_objects(bucket_name, prefix=prefix, recursive=True)
return [obj.object_name for obj in objects]
except S3Error as e:
logger.error("Failed to list objects",
bucket_name=bucket_name,
prefix=prefix,
error=str(e))
return []
def delete_object(self, bucket_name: str, object_name: str) -> bool:
"""Delete an object from MinIO"""
try:
self._client.remove_object(bucket_name, object_name)
logger.info("Deleted object from MinIO",
bucket_name=bucket_name,
object_name=object_name)
return True
except S3Error as e:
logger.error("Failed to delete object",
bucket_name=bucket_name,
object_name=object_name,
error=str(e))
return False
def get_presigned_url(
self,
bucket_name: str,
object_name: str,
expires: int = 3600
) -> Optional[str]:
"""Generate presigned URL for object access"""
try:
url = self._client.presigned_get_object(
bucket_name,
object_name,
expires=expires
)
return url
except S3Error as e:
logger.error("Failed to generate presigned URL",
bucket_name=bucket_name,
object_name=object_name,
error=str(e))
return None
def copy_object(
self,
source_bucket: str,
source_object: str,
dest_bucket: str,
dest_object: str
) -> bool:
"""Copy object within MinIO"""
try:
# Ensure destination bucket exists
self.create_bucket(dest_bucket)
# Copy object
self._client.copy_object(dest_bucket, dest_object,
f"{source_bucket}/{source_object}")
logger.info("Copied object in MinIO",
source_bucket=source_bucket,
source_object=source_object,
dest_bucket=dest_bucket,
dest_object=dest_object)
return True
except S3Error as e:
logger.error("Failed to copy object",
source_bucket=source_bucket,
source_object=source_object,
dest_bucket=dest_bucket,
dest_object=dest_object,
error=str(e))
return False
def get_object_metadata(self, bucket_name: str, object_name: str) -> Optional[Dict[str, Any]]:
"""Get object metadata"""
try:
stat = self._client.stat_object(bucket_name, object_name)
return {
"size": stat.size,
"last_modified": stat.last_modified,
"content_type": stat.content_type,
"metadata": stat.metadata or {}
}
except S3Error as e:
logger.error("Failed to get object metadata",
bucket_name=bucket_name,
object_name=object_name,
error=str(e))
return None
def health_check(self) -> bool:
"""Check MinIO service health"""
try:
# Simple bucket list to check connectivity
self._client.list_buckets()
return True
except Exception as e:
logger.error("MinIO health check failed", error=str(e))
return False
# Singleton instance for convenience
minio_client = MinIOClient()

View File

@@ -315,10 +315,9 @@ class BaseServiceSettings(BaseSettings):
# ================================================================
# ML & AI CONFIGURATION
# ================================================================
# Model Storage
MODEL_STORAGE_PATH: str = os.getenv("MODEL_STORAGE_PATH", "/app/models")
MODEL_STORAGE_BACKEND: str = os.getenv("MODEL_STORAGE_BACKEND", "local") # local, s3, gcs
# Model Storage Backend (MinIO is the primary storage)
MODEL_STORAGE_BACKEND: str = os.getenv("MODEL_STORAGE_BACKEND", "minio")
# Training Configuration
MAX_TRAINING_TIME_MINUTES: int = int(os.getenv("MAX_TRAINING_TIME_MINUTES", "30"))

View File

@@ -308,6 +308,47 @@ def add_metrics_middleware(app, metrics_collector: MetricsCollector):
return metrics_collector
def track_user_activity(user_id: str, action: str, service_name: str = "unknown-service", metadata: dict = None):
"""Track user activity metrics using the appropriate metrics collector"""
if metadata is None:
metadata = {}
# Add user-specific attributes
attributes = {
"user.id": user_id,
"action": action,
**metadata
}
# Get the metrics collector for the specified service
metrics_collector = get_metrics_collector(service_name)
if metrics_collector:
# Use the collector's counter registration system
counter_name = "user_activity_total"
# Check if counter already exists, if not register it
if counter_name not in metrics_collector._counters:
metrics_collector.register_counter(
name=counter_name,
documentation="Total user activity events"
)
# Increment the counter with attributes
metrics_collector.increment_counter(counter_name, value=1, labels=attributes)
else:
# Fallback: create a temporary counter if no collector exists
from opentelemetry import metrics
meter = metrics.get_meter(__name__)
user_activity_counter = meter.create_counter(
name="user_activity_total",
description="User activity events",
unit="events"
)
user_activity_counter.add(1, attributes)
def setup_metrics_early(
app,
service_name: str = None,