New alert service
This commit is contained in:
247
services/tenant/app/jobs/usage_tracking_scheduler.py
Normal file
247
services/tenant/app/jobs/usage_tracking_scheduler.py
Normal file
@@ -0,0 +1,247 @@
|
||||
"""
|
||||
Usage Tracking Scheduler
|
||||
Tracks daily usage snapshots for all active tenants
|
||||
"""
|
||||
import asyncio
|
||||
import structlog
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
from sqlalchemy import select, func
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class UsageTrackingScheduler:
|
||||
"""Scheduler for daily usage tracking"""
|
||||
|
||||
def __init__(self, db_manager, redis_client, config):
|
||||
self.db_manager = db_manager
|
||||
self.redis = redis_client
|
||||
self.config = config
|
||||
self._running = False
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
|
||||
def seconds_until_target_time(self) -> float:
|
||||
"""Calculate seconds until next target time (default 2am UTC)"""
|
||||
now = datetime.now(timezone.utc)
|
||||
target = now.replace(
|
||||
hour=self.config.USAGE_TRACKING_HOUR,
|
||||
minute=self.config.USAGE_TRACKING_MINUTE,
|
||||
second=0,
|
||||
microsecond=0
|
||||
)
|
||||
|
||||
if target <= now:
|
||||
target += timedelta(days=1)
|
||||
|
||||
return (target - now).total_seconds()
|
||||
|
||||
async def _get_tenant_usage(self, session, tenant_id: str) -> dict:
|
||||
"""Get current usage counts for a tenant"""
|
||||
usage = {}
|
||||
|
||||
try:
|
||||
# Import models here to avoid circular imports
|
||||
from app.models.tenants import TenantMember
|
||||
|
||||
# Users count
|
||||
result = await session.execute(
|
||||
select(func.count()).select_from(TenantMember).where(TenantMember.tenant_id == tenant_id)
|
||||
)
|
||||
usage['users'] = result.scalar() or 0
|
||||
|
||||
# Get counts from other services via their databases
|
||||
# For now, we'll track basic metrics. More metrics can be added by querying other service databases
|
||||
|
||||
# Training jobs today (from Redis quota tracking)
|
||||
today_key = f"quota:training_jobs:{tenant_id}:{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
|
||||
training_count = await self.redis.get(today_key)
|
||||
usage['training_jobs'] = int(training_count) if training_count else 0
|
||||
|
||||
# Forecasts today (from Redis quota tracking)
|
||||
forecast_key = f"quota:forecasts:{tenant_id}:{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
|
||||
forecast_count = await self.redis.get(forecast_key)
|
||||
usage['forecasts'] = int(forecast_count) if forecast_count else 0
|
||||
|
||||
# API calls this hour (from Redis quota tracking)
|
||||
hour_key = f"quota:api_calls:{tenant_id}:{datetime.now(timezone.utc).strftime('%Y-%m-%d-%H')}"
|
||||
api_count = await self.redis.get(hour_key)
|
||||
usage['api_calls'] = int(api_count) if api_count else 0
|
||||
|
||||
# Storage (placeholder - implement based on file storage system)
|
||||
usage['storage'] = 0.0
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting usage for tenant", tenant_id=tenant_id, error=str(e), exc_info=True)
|
||||
return {}
|
||||
|
||||
return usage
|
||||
|
||||
async def _track_metrics(self, tenant_id: str, usage: dict):
|
||||
"""Track metrics to Redis"""
|
||||
from app.api.usage_forecast import track_usage_snapshot
|
||||
|
||||
for metric_name, value in usage.items():
|
||||
try:
|
||||
await track_usage_snapshot(tenant_id, metric_name, value)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to track metric",
|
||||
tenant_id=tenant_id,
|
||||
metric=metric_name,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def _run_cycle(self):
|
||||
"""Execute one tracking cycle"""
|
||||
start_time = datetime.now(timezone.utc)
|
||||
logger.info("Starting daily usage tracking cycle")
|
||||
|
||||
try:
|
||||
async with self.db_manager.get_session() as session:
|
||||
# Import models here to avoid circular imports
|
||||
from app.models.tenants import Tenant, Subscription
|
||||
from sqlalchemy import select
|
||||
|
||||
# Get all active tenants
|
||||
result = await session.execute(
|
||||
select(Tenant, Subscription)
|
||||
.join(Subscription, Tenant.id == Subscription.tenant_id)
|
||||
.where(Tenant.is_active == True)
|
||||
.where(Subscription.status.in_(['active', 'trialing', 'cancelled']))
|
||||
)
|
||||
|
||||
tenants_data = result.all()
|
||||
total_tenants = len(tenants_data)
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
|
||||
logger.info(f"Found {total_tenants} active tenants to track")
|
||||
|
||||
# Process each tenant
|
||||
for tenant, subscription in tenants_data:
|
||||
try:
|
||||
usage = await self._get_tenant_usage(session, tenant.id)
|
||||
|
||||
if usage:
|
||||
await self._track_metrics(tenant.id, usage)
|
||||
success_count += 1
|
||||
else:
|
||||
logger.warning(
|
||||
"No usage data available for tenant",
|
||||
tenant_id=tenant.id
|
||||
)
|
||||
error_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error tracking tenant usage",
|
||||
tenant_id=tenant.id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
error_count += 1
|
||||
|
||||
end_time = datetime.now(timezone.utc)
|
||||
duration = (end_time - start_time).total_seconds()
|
||||
|
||||
logger.info(
|
||||
"Daily usage tracking completed",
|
||||
total_tenants=total_tenants,
|
||||
success=success_count,
|
||||
errors=error_count,
|
||||
duration_seconds=duration
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Usage tracking cycle failed", error=str(e), exc_info=True)
|
||||
|
||||
async def _run_scheduler(self):
|
||||
"""Main scheduler loop"""
|
||||
logger.info(
|
||||
"Usage tracking scheduler loop started",
|
||||
target_hour=self.config.USAGE_TRACKING_HOUR,
|
||||
target_minute=self.config.USAGE_TRACKING_MINUTE
|
||||
)
|
||||
|
||||
# Initial delay to target time
|
||||
delay = self.seconds_until_target_time()
|
||||
logger.info(f"Waiting {delay/3600:.2f} hours until next run at {self.config.USAGE_TRACKING_HOUR:02d}:{self.config.USAGE_TRACKING_MINUTE:02d} UTC")
|
||||
|
||||
try:
|
||||
await asyncio.sleep(delay)
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Scheduler cancelled during initial delay")
|
||||
return
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await self._run_cycle()
|
||||
except Exception as e:
|
||||
logger.error("Scheduler cycle error", error=str(e), exc_info=True)
|
||||
|
||||
# Wait 24 hours until next run
|
||||
try:
|
||||
await asyncio.sleep(86400)
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Scheduler cancelled during sleep")
|
||||
break
|
||||
|
||||
def start(self):
|
||||
"""Start the scheduler"""
|
||||
if not self.config.USAGE_TRACKING_ENABLED:
|
||||
logger.info("Usage tracking scheduler disabled by configuration")
|
||||
return
|
||||
|
||||
if self._running:
|
||||
logger.warning("Usage tracking scheduler already running")
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._run_scheduler())
|
||||
logger.info("Usage tracking scheduler started successfully")
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the scheduler gracefully"""
|
||||
if not self._running:
|
||||
logger.debug("Scheduler not running, nothing to stop")
|
||||
return
|
||||
|
||||
logger.info("Stopping usage tracking scheduler")
|
||||
self._running = False
|
||||
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Scheduler task cancelled successfully")
|
||||
|
||||
logger.info("Usage tracking scheduler stopped")
|
||||
|
||||
|
||||
# Global instance
|
||||
_scheduler: Optional[UsageTrackingScheduler] = None
|
||||
|
||||
|
||||
async def start_scheduler(db_manager, redis_client, config):
|
||||
"""Start the usage tracking scheduler"""
|
||||
global _scheduler
|
||||
|
||||
try:
|
||||
_scheduler = UsageTrackingScheduler(db_manager, redis_client, config)
|
||||
_scheduler.start()
|
||||
logger.info("Usage tracking scheduler module initialized")
|
||||
except Exception as e:
|
||||
logger.error("Failed to start usage tracking scheduler", error=str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
async def stop_scheduler():
|
||||
"""Stop the usage tracking scheduler"""
|
||||
global _scheduler
|
||||
|
||||
if _scheduler:
|
||||
await _scheduler.stop()
|
||||
_scheduler = None
|
||||
logger.info("Usage tracking scheduler module stopped")
|
||||
Reference in New Issue
Block a user