Add new infra architecture
This commit is contained in:
@@ -20,28 +20,12 @@ from shared.service_base import StandardFastAPIService
|
||||
class POSService(StandardFastAPIService):
|
||||
"""POS Integration Service with standardized setup"""
|
||||
|
||||
expected_migration_version = "00001"
|
||||
|
||||
async def on_startup(self, app):
|
||||
"""Custom startup logic including migration verification"""
|
||||
await self.verify_migrations()
|
||||
await super().on_startup(app)
|
||||
|
||||
async def verify_migrations(self):
|
||||
"""Verify database schema matches the latest migrations."""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
result = await session.execute(text("SELECT version_num FROM alembic_version"))
|
||||
version = result.scalar()
|
||||
if version != self.expected_migration_version:
|
||||
self.logger.error(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
raise RuntimeError(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
self.logger.info(f"Migration verification successful: {version}")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Migration verification failed: {e}")
|
||||
raise
|
||||
expected_migration_version = "e9976ec9fe9e"
|
||||
|
||||
def __init__(self):
|
||||
# Initialize scheduler reference
|
||||
self.pos_scheduler = None
|
||||
|
||||
# Define expected database tables for health checks
|
||||
pos_expected_tables = [
|
||||
'pos_configurations', 'pos_transactions', 'pos_transaction_items',
|
||||
@@ -87,15 +71,42 @@ class POSService(StandardFastAPIService):
|
||||
custom_metrics=pos_custom_metrics
|
||||
)
|
||||
|
||||
async def verify_migrations(self):
|
||||
"""Verify database schema matches the latest migrations."""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
result = await session.execute(text("SELECT version_num FROM alembic_version"))
|
||||
version = result.scalar()
|
||||
if version != self.expected_migration_version:
|
||||
self.logger.error(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
raise RuntimeError(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
self.logger.info(f"Migration verification successful: {version}")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Migration verification failed: {e}")
|
||||
raise
|
||||
|
||||
async def on_startup(self, app: FastAPI):
|
||||
"""Custom startup logic for POS service"""
|
||||
# Start background scheduler for POS-to-Sales sync
|
||||
# Verify migrations first
|
||||
await self.verify_migrations()
|
||||
|
||||
# Call parent startup
|
||||
await super().on_startup(app)
|
||||
|
||||
# Start background scheduler for POS-to-Sales sync with leader election
|
||||
try:
|
||||
from app.scheduler import start_scheduler
|
||||
start_scheduler()
|
||||
self.logger.info("Background scheduler started successfully")
|
||||
from app.scheduler import POSScheduler
|
||||
self.pos_scheduler = POSScheduler(
|
||||
redis_url=settings.REDIS_URL, # Pass Redis URL for leader election
|
||||
sync_interval_minutes=settings.SYNC_INTERVAL_SECONDS // 60 if settings.SYNC_INTERVAL_SECONDS >= 60 else 5
|
||||
)
|
||||
await self.pos_scheduler.start()
|
||||
self.logger.info("POS scheduler started successfully with leader election")
|
||||
|
||||
# Store scheduler in app state for status checks
|
||||
app.state.pos_scheduler = self.pos_scheduler
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to start background scheduler: {e}", exc_info=True)
|
||||
self.logger.error(f"Failed to start POS scheduler: {e}", exc_info=True)
|
||||
# Don't fail startup if scheduler fails
|
||||
|
||||
# Custom startup completed
|
||||
@@ -103,13 +114,13 @@ class POSService(StandardFastAPIService):
|
||||
|
||||
async def on_shutdown(self, app: FastAPI):
|
||||
"""Custom shutdown logic for POS service"""
|
||||
# Shutdown background scheduler
|
||||
# Shutdown POS scheduler
|
||||
try:
|
||||
from app.scheduler import shutdown_scheduler
|
||||
shutdown_scheduler()
|
||||
self.logger.info("Background scheduler stopped successfully")
|
||||
if self.pos_scheduler:
|
||||
await self.pos_scheduler.stop()
|
||||
self.logger.info("POS scheduler stopped successfully")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to stop background scheduler: {e}", exc_info=True)
|
||||
self.logger.error(f"Failed to stop POS scheduler: {e}", exc_info=True)
|
||||
|
||||
# Database cleanup is handled by the base class
|
||||
pass
|
||||
|
||||
@@ -5,17 +5,19 @@ Sets up periodic background jobs for:
|
||||
- Syncing POS transactions to sales service
|
||||
- Other maintenance tasks as needed
|
||||
|
||||
To enable scheduling, add to main.py startup:
|
||||
Uses Redis-based leader election to ensure only one pod runs scheduled tasks
|
||||
when running with multiple replicas.
|
||||
|
||||
Usage in main.py:
|
||||
```python
|
||||
from app.scheduler import start_scheduler, shutdown_scheduler
|
||||
from app.scheduler import POSScheduler
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
start_scheduler()
|
||||
# On startup
|
||||
scheduler = POSScheduler(redis_url=settings.REDIS_URL)
|
||||
await scheduler.start()
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
shutdown_scheduler()
|
||||
# On shutdown
|
||||
await scheduler.stop()
|
||||
```
|
||||
"""
|
||||
|
||||
@@ -23,65 +25,307 @@ import structlog
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Global scheduler instance
|
||||
scheduler = None
|
||||
|
||||
class POSScheduler:
|
||||
"""
|
||||
POS Scheduler service that manages background sync jobs.
|
||||
|
||||
Uses Redis-based leader election to ensure only one pod runs
|
||||
scheduled jobs in a multi-replica deployment.
|
||||
"""
|
||||
|
||||
def __init__(self, redis_url: str = None, sync_interval_minutes: int = 5):
|
||||
"""
|
||||
Initialize POS scheduler.
|
||||
|
||||
Args:
|
||||
redis_url: Redis connection URL for leader election
|
||||
sync_interval_minutes: Interval for POS-to-sales sync job
|
||||
"""
|
||||
self.scheduler = None
|
||||
self.sync_interval_minutes = sync_interval_minutes
|
||||
|
||||
# Leader election
|
||||
self._redis_url = redis_url
|
||||
self._leader_election = None
|
||||
self._redis_client = None
|
||||
self._scheduler_started = False
|
||||
|
||||
async def start(self):
|
||||
"""Start the POS scheduler with leader election"""
|
||||
if self._redis_url:
|
||||
await self._start_with_leader_election()
|
||||
else:
|
||||
# Fallback to standalone mode (for local development or single-pod deployments)
|
||||
logger.warning("Redis URL not provided, starting POS scheduler in standalone mode")
|
||||
await self._start_standalone()
|
||||
|
||||
async def _start_with_leader_election(self):
|
||||
"""Start with Redis-based leader election for horizontal scaling"""
|
||||
import redis.asyncio as redis
|
||||
from shared.leader_election import LeaderElectionService
|
||||
|
||||
try:
|
||||
# Create Redis connection
|
||||
self._redis_client = redis.from_url(self._redis_url, decode_responses=False)
|
||||
await self._redis_client.ping()
|
||||
|
||||
# Create scheduler (but don't start it yet)
|
||||
self.scheduler = AsyncIOScheduler()
|
||||
|
||||
# Create leader election
|
||||
self._leader_election = LeaderElectionService(
|
||||
self._redis_client,
|
||||
service_name="pos-scheduler"
|
||||
)
|
||||
|
||||
# Start leader election with callbacks
|
||||
await self._leader_election.start(
|
||||
on_become_leader=self._on_become_leader,
|
||||
on_lose_leader=self._on_lose_leader
|
||||
)
|
||||
|
||||
logger.info("POS scheduler started with leader election",
|
||||
is_leader=self._leader_election.is_leader,
|
||||
instance_id=self._leader_election.instance_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to start with leader election, falling back to standalone",
|
||||
error=str(e))
|
||||
await self._start_standalone()
|
||||
|
||||
async def _on_become_leader(self):
|
||||
"""Called when this instance becomes the leader"""
|
||||
logger.info("POS scheduler became leader, starting scheduled jobs")
|
||||
await self._start_scheduler()
|
||||
|
||||
async def _on_lose_leader(self):
|
||||
"""Called when this instance loses leadership"""
|
||||
logger.warning("POS scheduler lost leadership, stopping scheduled jobs")
|
||||
await self._stop_scheduler()
|
||||
|
||||
async def _start_scheduler(self):
|
||||
"""Start the APScheduler with POS jobs"""
|
||||
if self._scheduler_started:
|
||||
logger.warning("POS scheduler already started")
|
||||
return
|
||||
|
||||
try:
|
||||
# Import sync job
|
||||
from app.jobs.sync_pos_to_sales import run_pos_to_sales_sync
|
||||
|
||||
# Job 1: Sync POS transactions to sales service
|
||||
self.scheduler.add_job(
|
||||
run_pos_to_sales_sync,
|
||||
trigger=IntervalTrigger(minutes=self.sync_interval_minutes),
|
||||
id='pos_to_sales_sync',
|
||||
name='Sync POS Transactions to Sales',
|
||||
replace_existing=True,
|
||||
max_instances=1, # Prevent concurrent runs
|
||||
coalesce=True, # Combine multiple missed runs into one
|
||||
misfire_grace_time=60 # Allow 60 seconds grace for missed runs
|
||||
)
|
||||
|
||||
# Start scheduler
|
||||
if not self.scheduler.running:
|
||||
self.scheduler.start()
|
||||
self._scheduler_started = True
|
||||
logger.info("POS scheduler jobs started",
|
||||
sync_interval_minutes=self.sync_interval_minutes,
|
||||
job_count=len(self.scheduler.get_jobs()),
|
||||
next_run=self.scheduler.get_jobs()[0].next_run_time if self.scheduler.get_jobs() else None)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to start POS scheduler", error=str(e))
|
||||
|
||||
async def _stop_scheduler(self):
|
||||
"""Stop the APScheduler"""
|
||||
if not self._scheduler_started:
|
||||
return
|
||||
|
||||
try:
|
||||
if self.scheduler and self.scheduler.running:
|
||||
self.scheduler.shutdown(wait=False)
|
||||
self._scheduler_started = False
|
||||
logger.info("POS scheduler jobs stopped")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to stop POS scheduler", error=str(e))
|
||||
|
||||
async def _start_standalone(self):
|
||||
"""Start scheduler without leader election (fallback mode)"""
|
||||
logger.warning("Starting POS scheduler in standalone mode (no leader election)")
|
||||
|
||||
self.scheduler = AsyncIOScheduler()
|
||||
|
||||
try:
|
||||
# Import sync job
|
||||
from app.jobs.sync_pos_to_sales import run_pos_to_sales_sync
|
||||
|
||||
self.scheduler.add_job(
|
||||
run_pos_to_sales_sync,
|
||||
trigger=IntervalTrigger(minutes=self.sync_interval_minutes),
|
||||
id='pos_to_sales_sync',
|
||||
name='Sync POS Transactions to Sales',
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
coalesce=True,
|
||||
misfire_grace_time=60
|
||||
)
|
||||
|
||||
if not self.scheduler.running:
|
||||
self.scheduler.start()
|
||||
self._scheduler_started = True
|
||||
logger.info("POS scheduler started (standalone mode)",
|
||||
sync_interval_minutes=self.sync_interval_minutes,
|
||||
next_run=self.scheduler.get_jobs()[0].next_run_time if self.scheduler.get_jobs() else None)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to start POS scheduler in standalone mode", error=str(e))
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the POS scheduler and leader election"""
|
||||
# Stop leader election
|
||||
if self._leader_election:
|
||||
await self._leader_election.stop()
|
||||
|
||||
# Stop scheduler
|
||||
await self._stop_scheduler()
|
||||
|
||||
# Close Redis
|
||||
if self._redis_client:
|
||||
await self._redis_client.close()
|
||||
|
||||
logger.info("POS scheduler stopped")
|
||||
|
||||
@property
|
||||
def is_leader(self) -> bool:
|
||||
"""Check if this instance is the leader"""
|
||||
return self._leader_election.is_leader if self._leader_election else True
|
||||
|
||||
def get_leader_status(self) -> dict:
|
||||
"""Get leader election status"""
|
||||
if self._leader_election:
|
||||
return self._leader_election.get_status()
|
||||
return {"is_leader": True, "mode": "standalone"}
|
||||
|
||||
def get_scheduler_status(self) -> dict:
|
||||
"""
|
||||
Get current scheduler status
|
||||
|
||||
Returns:
|
||||
Dict with scheduler info and job statuses
|
||||
"""
|
||||
if self.scheduler is None or not self._scheduler_started:
|
||||
return {
|
||||
"running": False,
|
||||
"is_leader": self.is_leader,
|
||||
"jobs": []
|
||||
}
|
||||
|
||||
jobs = []
|
||||
for job in self.scheduler.get_jobs():
|
||||
jobs.append({
|
||||
"id": job.id,
|
||||
"name": job.name,
|
||||
"next_run": job.next_run_time.isoformat() if job.next_run_time else None,
|
||||
"trigger": str(job.trigger)
|
||||
})
|
||||
|
||||
return {
|
||||
"running": True,
|
||||
"is_leader": self.is_leader,
|
||||
"jobs": jobs,
|
||||
"state": self.scheduler.state
|
||||
}
|
||||
|
||||
def trigger_job_now(self, job_id: str) -> bool:
|
||||
"""
|
||||
Manually trigger a scheduled job immediately
|
||||
|
||||
Args:
|
||||
job_id: Job identifier (e.g., 'pos_to_sales_sync')
|
||||
|
||||
Returns:
|
||||
True if job was triggered, False otherwise
|
||||
"""
|
||||
if self.scheduler is None or not self._scheduler_started:
|
||||
logger.error("Cannot trigger job, scheduler not running")
|
||||
return False
|
||||
|
||||
if not self.is_leader:
|
||||
logger.warning("Cannot trigger job, this instance is not the leader")
|
||||
return False
|
||||
|
||||
try:
|
||||
job = self.scheduler.get_job(job_id)
|
||||
if job:
|
||||
self.scheduler.modify_job(job_id, next_run_time=datetime.now())
|
||||
logger.info("Job triggered manually", job_id=job_id)
|
||||
return True
|
||||
else:
|
||||
logger.warning("Job not found", job_id=job_id)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to trigger job", job_id=job_id, error=str(e))
|
||||
return False
|
||||
|
||||
|
||||
# ================================================================
|
||||
# Legacy compatibility functions (deprecated - use POSScheduler class)
|
||||
# ================================================================
|
||||
|
||||
# Global scheduler instance for backward compatibility
|
||||
_scheduler_instance: Optional[POSScheduler] = None
|
||||
|
||||
|
||||
def start_scheduler():
|
||||
"""
|
||||
Initialize and start the background scheduler
|
||||
DEPRECATED: Use POSScheduler class directly for better leader election support.
|
||||
|
||||
Jobs configured:
|
||||
- POS to Sales Sync: Every 5 minutes
|
||||
Initialize and start the background scheduler (legacy function).
|
||||
"""
|
||||
global scheduler
|
||||
global _scheduler_instance
|
||||
|
||||
if scheduler is not None:
|
||||
if _scheduler_instance is not None:
|
||||
logger.warning("Scheduler already running")
|
||||
return
|
||||
|
||||
logger.warning("Using deprecated start_scheduler function. "
|
||||
"Consider migrating to POSScheduler class for leader election support.")
|
||||
|
||||
try:
|
||||
scheduler = AsyncIOScheduler()
|
||||
|
||||
# Job 1: Sync POS transactions to sales service
|
||||
from app.jobs.sync_pos_to_sales import run_pos_to_sales_sync
|
||||
|
||||
scheduler.add_job(
|
||||
run_pos_to_sales_sync,
|
||||
trigger=IntervalTrigger(minutes=5),
|
||||
id='pos_to_sales_sync',
|
||||
name='Sync POS Transactions to Sales',
|
||||
replace_existing=True,
|
||||
max_instances=1, # Prevent concurrent runs
|
||||
coalesce=True, # Combine multiple missed runs into one
|
||||
misfire_grace_time=60 # Allow 60 seconds grace for missed runs
|
||||
)
|
||||
|
||||
scheduler.start()
|
||||
logger.info("Background scheduler started",
|
||||
jobs=len(scheduler.get_jobs()),
|
||||
next_run=scheduler.get_jobs()[0].next_run_time if scheduler.get_jobs() else None)
|
||||
_scheduler_instance = POSScheduler()
|
||||
# Note: This is synchronous fallback, no leader election
|
||||
import asyncio
|
||||
asyncio.create_task(_scheduler_instance._start_standalone())
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to start scheduler", error=str(e), exc_info=True)
|
||||
scheduler = None
|
||||
_scheduler_instance = None
|
||||
|
||||
|
||||
def shutdown_scheduler():
|
||||
"""Gracefully shutdown the scheduler"""
|
||||
global scheduler
|
||||
"""
|
||||
DEPRECATED: Use POSScheduler class directly.
|
||||
|
||||
if scheduler is None:
|
||||
Gracefully shutdown the scheduler (legacy function).
|
||||
"""
|
||||
global _scheduler_instance
|
||||
|
||||
if _scheduler_instance is None:
|
||||
logger.warning("Scheduler not running")
|
||||
return
|
||||
|
||||
try:
|
||||
scheduler.shutdown(wait=True)
|
||||
logger.info("Background scheduler stopped")
|
||||
scheduler = None
|
||||
import asyncio
|
||||
asyncio.create_task(_scheduler_instance.stop())
|
||||
_scheduler_instance = None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to shutdown scheduler", error=str(e), exc_info=True)
|
||||
@@ -89,57 +333,25 @@ def shutdown_scheduler():
|
||||
|
||||
def get_scheduler_status():
|
||||
"""
|
||||
Get current scheduler status
|
||||
DEPRECATED: Use POSScheduler class directly.
|
||||
|
||||
Returns:
|
||||
Dict with scheduler info and job statuses
|
||||
Get current scheduler status (legacy function).
|
||||
"""
|
||||
if scheduler is None:
|
||||
if _scheduler_instance is None:
|
||||
return {
|
||||
"running": False,
|
||||
"jobs": []
|
||||
}
|
||||
|
||||
jobs = []
|
||||
for job in scheduler.get_jobs():
|
||||
jobs.append({
|
||||
"id": job.id,
|
||||
"name": job.name,
|
||||
"next_run": job.next_run_time.isoformat() if job.next_run_time else None,
|
||||
"trigger": str(job.trigger)
|
||||
})
|
||||
|
||||
return {
|
||||
"running": True,
|
||||
"jobs": jobs,
|
||||
"state": scheduler.state
|
||||
}
|
||||
return _scheduler_instance.get_scheduler_status()
|
||||
|
||||
|
||||
def trigger_job_now(job_id: str):
|
||||
"""
|
||||
Manually trigger a scheduled job immediately
|
||||
DEPRECATED: Use POSScheduler class directly.
|
||||
|
||||
Args:
|
||||
job_id: Job identifier (e.g., 'pos_to_sales_sync')
|
||||
|
||||
Returns:
|
||||
True if job was triggered, False otherwise
|
||||
Manually trigger a scheduled job immediately (legacy function).
|
||||
"""
|
||||
if scheduler is None:
|
||||
if _scheduler_instance is None:
|
||||
logger.error("Cannot trigger job, scheduler not running")
|
||||
return False
|
||||
|
||||
try:
|
||||
job = scheduler.get_job(job_id)
|
||||
if job:
|
||||
scheduler.modify_job(job_id, next_run_time=datetime.now())
|
||||
logger.info("Job triggered manually", job_id=job_id)
|
||||
return True
|
||||
else:
|
||||
logger.warning("Job not found", job_id=job_id)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to trigger job", job_id=job_id, error=str(e))
|
||||
return False
|
||||
return _scheduler_instance.trigger_job_now(job_id)
|
||||
|
||||
Reference in New Issue
Block a user