Add minio support and forntend analitycs
This commit is contained in:
@@ -116,29 +116,51 @@ async def broadcast_training_progress(job_id: str, progress: dict):
|
||||
await websocket_manager.broadcast(job_id, message)
|
||||
```
|
||||
|
||||
### Model Artifact Management
|
||||
### Model Artifact Management (MinIO Storage)
|
||||
|
||||
```python
|
||||
# Model storage and retrieval
|
||||
# Model storage and retrieval using MinIO
|
||||
import joblib
|
||||
from pathlib import Path
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
# Save trained model
|
||||
# Save trained model to MinIO
|
||||
def save_model_artifact(model: Prophet, tenant_id: str, product_id: str) -> str:
|
||||
"""Serialize and store model"""
|
||||
model_dir = Path(f"/models/{tenant_id}/{product_id}")
|
||||
model_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
"""Serialize and store model in MinIO"""
|
||||
import io
|
||||
version = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||
model_path = model_dir / f"model_v{version}.pkl"
|
||||
model_id = str(uuid.uuid4())
|
||||
object_name = f"models/{tenant_id}/{product_id}/{model_id}.pkl"
|
||||
|
||||
joblib.dump(model, model_path)
|
||||
return str(model_path)
|
||||
# Serialize model (joblib.dump writes to file-like objects)
|
||||
buffer = io.BytesIO()
|
||||
joblib.dump(model, buffer)
|
||||
model_data = buffer.getvalue()
|
||||
|
||||
# Load trained model
|
||||
# Upload to MinIO
|
||||
minio_client.put_object(
|
||||
bucket_name="training-models",
|
||||
object_name=object_name,
|
||||
data=model_data,
|
||||
content_type="application/octet-stream"
|
||||
)
|
||||
|
||||
# Return MinIO path
|
||||
return f"minio://training-models/{object_name}"
|
||||
|
||||
# Load trained model from MinIO
|
||||
def load_model_artifact(model_path: str) -> Prophet:
|
||||
"""Load serialized model"""
|
||||
return joblib.load(model_path)
|
||||
"""Load serialized model from MinIO"""
|
||||
import io
|
||||
# Parse MinIO path: minio://bucket_name/object_path
|
||||
_, bucket_and_path = model_path.split("://", 1)
|
||||
bucket_name, object_name = bucket_and_path.split("/", 1)
|
||||
|
||||
# Download from MinIO
|
||||
model_data = minio_client.get_object(bucket_name, object_name)
|
||||
|
||||
# Deserialize (joblib.load reads from file-like objects)
|
||||
buffer = io.BytesIO(model_data)
|
||||
return joblib.load(buffer)
|
||||
```
|
||||
|
||||
### Performance Metrics Calculation
|
||||
@@ -194,8 +216,8 @@ def calculate_performance_metrics(model: Prophet, actual_data: pd.DataFrame) ->
|
||||
- **Framework**: FastAPI (Python 3.11+) - Async web framework with WebSocket support
|
||||
- **Database**: PostgreSQL 17 - Training logs, model metadata, job queue
|
||||
- **ML Library**: Prophet (fbprophet) - Time series forecasting
|
||||
- **Model Storage**: Joblib - Model serialization
|
||||
- **File System**: Persistent volumes - Model artifact storage
|
||||
- **Model Storage**: MinIO (S3-compatible) - Distributed object storage with TLS
|
||||
- **Serialization**: Joblib - Model serialization
|
||||
- **WebSocket**: FastAPI WebSocket - Real-time progress updates
|
||||
- **Messaging**: RabbitMQ 4.1 - Training completion events
|
||||
- **ORM**: SQLAlchemy 2.0 (async) - Database abstraction
|
||||
@@ -442,7 +464,13 @@ websocket_messages_sent = Counter(
|
||||
- `PORT` - Service port (default: 8004)
|
||||
- `DATABASE_URL` - PostgreSQL connection string
|
||||
- `RABBITMQ_URL` - RabbitMQ connection string
|
||||
- `MODEL_STORAGE_PATH` - Path for model artifacts (default: /models)
|
||||
|
||||
**MinIO Configuration:**
|
||||
- `MINIO_ENDPOINT` - MinIO server endpoint (default: minio.bakery-ia.svc.cluster.local:9000)
|
||||
- `MINIO_ACCESS_KEY` - MinIO access key
|
||||
- `MINIO_SECRET_KEY` - MinIO secret key
|
||||
- `MINIO_USE_SSL` - Enable TLS (default: true)
|
||||
- `MINIO_MODEL_BUCKET` - Bucket for models (default: training-models)
|
||||
|
||||
**Training Configuration:**
|
||||
- `MAX_CONCURRENT_JOBS` - Maximum parallel training jobs (default: 3)
|
||||
@@ -462,10 +490,9 @@ websocket_messages_sent = Counter(
|
||||
- `WEBSOCKET_MAX_CONNECTIONS` - Max connections per tenant (default: 10)
|
||||
- `WEBSOCKET_MESSAGE_QUEUE_SIZE` - Message buffer size (default: 100)
|
||||
|
||||
**Storage Configuration:**
|
||||
- `MODEL_RETENTION_DAYS` - Days to keep old models (default: 90)
|
||||
- `MAX_MODEL_VERSIONS_PER_PRODUCT` - Version limit (default: 10)
|
||||
- `ENABLE_MODEL_COMPRESSION` - Compress model files (default: true)
|
||||
**Storage Configuration (MinIO):**
|
||||
- `MINIO_MODEL_LIFECYCLE_DAYS` - Days to keep old model versions (default: 90)
|
||||
- `MINIO_CACHE_TTL_SECONDS` - Model cache TTL in seconds (default: 3600)
|
||||
|
||||
## Development Setup
|
||||
|
||||
@@ -473,7 +500,7 @@ websocket_messages_sent = Counter(
|
||||
- Python 3.11+
|
||||
- PostgreSQL 17
|
||||
- RabbitMQ 4.1
|
||||
- Persistent storage for model artifacts
|
||||
- MinIO (S3-compatible object storage)
|
||||
|
||||
### Local Development
|
||||
```bash
|
||||
@@ -488,10 +515,13 @@ pip install -r requirements.txt
|
||||
# Set environment variables
|
||||
export DATABASE_URL=postgresql://user:pass@localhost:5432/training
|
||||
export RABBITMQ_URL=amqp://guest:guest@localhost:5672/
|
||||
export MODEL_STORAGE_PATH=/tmp/models
|
||||
export MINIO_ENDPOINT=localhost:9000
|
||||
export MINIO_ACCESS_KEY=minioadmin
|
||||
export MINIO_SECRET_KEY=minioadmin
|
||||
export MINIO_USE_SSL=false # Use true in production
|
||||
|
||||
# Create model storage directory
|
||||
mkdir -p /tmp/models
|
||||
# Start MinIO locally (if not using K8s)
|
||||
docker run -p 9000:9000 -p 9001:9001 minio/minio server /data --console-address ":9001"
|
||||
|
||||
# Run database migrations
|
||||
alembic upgrade head
|
||||
@@ -590,7 +620,7 @@ for feature_name in poi_features.keys():
|
||||
- **External Service** - Fetch weather, traffic, holiday, and POI feature data
|
||||
- **PostgreSQL** - Store job queue, models, metrics, logs
|
||||
- **RabbitMQ** - Publish training completion events
|
||||
- **File System** - Store model artifacts
|
||||
- **MinIO** - Store model artifacts (S3-compatible object storage with TLS)
|
||||
|
||||
### Dependents (Services That Call This)
|
||||
- **Forecasting Service** - Load trained models for predictions
|
||||
@@ -627,11 +657,11 @@ for feature_name in poi_features.keys():
|
||||
4. **Resource Limits** - CPU/memory limits per training job
|
||||
5. **Priority Queue** - Prioritize important products first
|
||||
|
||||
### Storage Optimization
|
||||
1. **Model Compression** - Compress model artifacts (gzip)
|
||||
2. **Old Model Cleanup** - Automatic deletion after retention period
|
||||
3. **Version Limits** - Keep only N most recent versions
|
||||
4. **Deduplication** - Avoid storing identical models
|
||||
### Storage Optimization (MinIO)
|
||||
1. **Object Versioning** - MinIO maintains version history automatically
|
||||
2. **Lifecycle Policies** - Auto-cleanup old versions after 90 days
|
||||
3. **TLS Encryption** - Secure communication with MinIO
|
||||
4. **Distributed Storage** - MinIO handles replication and availability
|
||||
|
||||
### WebSocket Optimization
|
||||
1. **Message Batching** - Batch progress updates (every 2 seconds)
|
||||
|
||||
Reference in New Issue
Block a user