469 lines
15 KiB
Python
469 lines
15 KiB
Python
"""
|
|
POI Refresh Service
|
|
|
|
Manages periodic POI context refresh jobs.
|
|
Detects changes in POI landscape and updates tenant POI contexts.
|
|
"""
|
|
|
|
import asyncio
|
|
from datetime import datetime, timezone, timedelta
|
|
from typing import Optional, Dict, Any, List
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select, and_, or_
|
|
import structlog
|
|
|
|
from app.models.poi_refresh_job import POIRefreshJob
|
|
from app.models.poi_context import TenantPOIContext
|
|
from app.services.poi_detection_service import POIDetectionService
|
|
from app.core.database import database_manager
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class POIRefreshService:
|
|
"""
|
|
POI Refresh Service
|
|
|
|
Manages background jobs for periodic POI context refresh.
|
|
Default refresh cycle: 180 days (6 months).
|
|
"""
|
|
|
|
DEFAULT_REFRESH_INTERVAL_DAYS = 180
|
|
DEFAULT_MAX_ATTEMPTS = 3
|
|
STALE_THRESHOLD_DAYS = 180
|
|
|
|
def __init__(
|
|
self,
|
|
poi_detection_service: Optional[POIDetectionService] = None,
|
|
refresh_interval_days: int = DEFAULT_REFRESH_INTERVAL_DAYS
|
|
):
|
|
"""
|
|
Initialize POI refresh service.
|
|
|
|
Args:
|
|
poi_detection_service: POI detection service instance
|
|
refresh_interval_days: Days between POI refreshes (default: 180)
|
|
"""
|
|
self.poi_detection_service = poi_detection_service or POIDetectionService()
|
|
self.refresh_interval_days = refresh_interval_days
|
|
|
|
logger.info(
|
|
"POI Refresh Service initialized",
|
|
refresh_interval_days=refresh_interval_days
|
|
)
|
|
|
|
async def schedule_refresh_job(
|
|
self,
|
|
tenant_id: str,
|
|
latitude: float,
|
|
longitude: float,
|
|
scheduled_at: Optional[datetime] = None,
|
|
session: Optional[AsyncSession] = None
|
|
) -> POIRefreshJob:
|
|
"""
|
|
Schedule a POI refresh job for a tenant.
|
|
|
|
Args:
|
|
tenant_id: Tenant UUID
|
|
latitude: Bakery latitude
|
|
longitude: Bakery longitude
|
|
scheduled_at: When to run the job (default: now + refresh_interval)
|
|
session: Database session
|
|
|
|
Returns:
|
|
Created POIRefreshJob
|
|
"""
|
|
if scheduled_at is None:
|
|
scheduled_at = datetime.now(timezone.utc) + timedelta(
|
|
days=self.refresh_interval_days
|
|
)
|
|
|
|
async def _create_job(db_session: AsyncSession):
|
|
# Check if pending job already exists
|
|
result = await db_session.execute(
|
|
select(POIRefreshJob).where(
|
|
and_(
|
|
POIRefreshJob.tenant_id == tenant_id,
|
|
POIRefreshJob.status.in_(["pending", "running"])
|
|
)
|
|
)
|
|
)
|
|
existing_job = result.scalar_one_or_none()
|
|
|
|
if existing_job:
|
|
logger.info(
|
|
"POI refresh job already scheduled",
|
|
tenant_id=tenant_id,
|
|
job_id=str(existing_job.id),
|
|
scheduled_at=existing_job.scheduled_at
|
|
)
|
|
return existing_job
|
|
|
|
# Create new job
|
|
job = POIRefreshJob(
|
|
tenant_id=tenant_id,
|
|
latitude=latitude,
|
|
longitude=longitude,
|
|
scheduled_at=scheduled_at,
|
|
status="pending",
|
|
max_attempts=self.DEFAULT_MAX_ATTEMPTS
|
|
)
|
|
|
|
db_session.add(job)
|
|
await db_session.commit()
|
|
await db_session.refresh(job)
|
|
|
|
logger.info(
|
|
"POI refresh job scheduled",
|
|
tenant_id=tenant_id,
|
|
job_id=str(job.id),
|
|
scheduled_at=scheduled_at
|
|
)
|
|
|
|
return job
|
|
|
|
if session:
|
|
return await _create_job(session)
|
|
else:
|
|
async with database_manager.get_session() as db_session:
|
|
return await _create_job(db_session)
|
|
|
|
async def execute_refresh_job(
|
|
self,
|
|
job_id: str,
|
|
session: Optional[AsyncSession] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Execute a POI refresh job.
|
|
|
|
Args:
|
|
job_id: Job UUID
|
|
session: Database session
|
|
|
|
Returns:
|
|
Execution result with status and details
|
|
"""
|
|
async def _execute(db_session: AsyncSession):
|
|
# Load job
|
|
result = await db_session.execute(
|
|
select(POIRefreshJob).where(POIRefreshJob.id == job_id)
|
|
)
|
|
job = result.scalar_one_or_none()
|
|
|
|
if not job:
|
|
raise ValueError(f"Job not found: {job_id}")
|
|
|
|
if job.status == "running":
|
|
return {
|
|
"status": "already_running",
|
|
"job_id": str(job.id),
|
|
"message": "Job is already running"
|
|
}
|
|
|
|
if job.status == "completed":
|
|
return {
|
|
"status": "already_completed",
|
|
"job_id": str(job.id),
|
|
"message": "Job already completed"
|
|
}
|
|
|
|
if not job.can_retry:
|
|
return {
|
|
"status": "max_attempts_reached",
|
|
"job_id": str(job.id),
|
|
"message": f"Max attempts ({job.max_attempts}) reached"
|
|
}
|
|
|
|
# Update job status
|
|
job.status = "running"
|
|
job.started_at = datetime.now(timezone.utc)
|
|
job.attempt_count += 1
|
|
await db_session.commit()
|
|
|
|
logger.info(
|
|
"Executing POI refresh job",
|
|
job_id=str(job.id),
|
|
tenant_id=str(job.tenant_id),
|
|
attempt=job.attempt_count
|
|
)
|
|
|
|
try:
|
|
# Get existing POI context
|
|
poi_result = await db_session.execute(
|
|
select(TenantPOIContext).where(
|
|
TenantPOIContext.tenant_id == job.tenant_id
|
|
)
|
|
)
|
|
existing_context = poi_result.scalar_one_or_none()
|
|
|
|
# Perform POI detection
|
|
detection_result = await self.poi_detection_service.detect_pois_for_bakery(
|
|
latitude=job.latitude,
|
|
longitude=job.longitude,
|
|
tenant_id=str(job.tenant_id),
|
|
force_refresh=True
|
|
)
|
|
|
|
# Analyze changes
|
|
changes = self._analyze_changes(
|
|
existing_context.poi_detection_results if existing_context else {},
|
|
detection_result
|
|
)
|
|
|
|
# Update job with results
|
|
job.status = "completed"
|
|
job.completed_at = datetime.now(timezone.utc)
|
|
job.pois_detected = sum(
|
|
data.get("count", 0)
|
|
for data in detection_result.values()
|
|
)
|
|
job.changes_detected = changes["has_significant_changes"]
|
|
job.change_summary = changes
|
|
|
|
# Schedule next refresh
|
|
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(
|
|
days=self.refresh_interval_days
|
|
)
|
|
|
|
await db_session.commit()
|
|
|
|
logger.info(
|
|
"POI refresh job completed",
|
|
job_id=str(job.id),
|
|
tenant_id=str(job.tenant_id),
|
|
pois_detected=job.pois_detected,
|
|
changes_detected=job.changes_detected,
|
|
duration_seconds=job.duration_seconds
|
|
)
|
|
|
|
# Schedule next job
|
|
await self.schedule_refresh_job(
|
|
tenant_id=str(job.tenant_id),
|
|
latitude=job.latitude,
|
|
longitude=job.longitude,
|
|
scheduled_at=job.next_scheduled_at,
|
|
session=db_session
|
|
)
|
|
|
|
return {
|
|
"status": "success",
|
|
"job_id": str(job.id),
|
|
"pois_detected": job.pois_detected,
|
|
"changes_detected": job.changes_detected,
|
|
"change_summary": changes,
|
|
"duration_seconds": job.duration_seconds,
|
|
"next_scheduled_at": job.next_scheduled_at.isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
# Job failed
|
|
job.status = "failed"
|
|
job.completed_at = datetime.now(timezone.utc)
|
|
job.error_message = str(e)
|
|
job.error_details = {
|
|
"error_type": type(e).__name__,
|
|
"error_message": str(e),
|
|
"attempt": job.attempt_count
|
|
}
|
|
|
|
# Schedule retry if attempts remaining
|
|
if job.can_retry:
|
|
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(hours=1)
|
|
logger.warning(
|
|
"POI refresh job failed, will retry",
|
|
job_id=str(job.id),
|
|
tenant_id=str(job.tenant_id),
|
|
attempt=job.attempt_count,
|
|
max_attempts=job.max_attempts,
|
|
error=str(e)
|
|
)
|
|
else:
|
|
logger.error(
|
|
"POI refresh job failed permanently",
|
|
job_id=str(job.id),
|
|
tenant_id=str(job.tenant_id),
|
|
attempt=job.attempt_count,
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
|
|
await db_session.commit()
|
|
|
|
return {
|
|
"status": "failed",
|
|
"job_id": str(job.id),
|
|
"error": str(e),
|
|
"attempt": job.attempt_count,
|
|
"can_retry": job.can_retry
|
|
}
|
|
|
|
if session:
|
|
return await _execute(session)
|
|
else:
|
|
async with database_manager.get_session() as db_session:
|
|
return await _execute(db_session)
|
|
|
|
def _analyze_changes(
|
|
self,
|
|
old_results: Dict[str, Any],
|
|
new_results: Dict[str, Any]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Analyze changes between old and new POI detection results.
|
|
|
|
Args:
|
|
old_results: Previous POI detection results
|
|
new_results: New POI detection results
|
|
|
|
Returns:
|
|
Change analysis with significance flag
|
|
"""
|
|
changes = {
|
|
"has_significant_changes": False,
|
|
"category_changes": {},
|
|
"total_poi_change": 0,
|
|
"new_categories": [],
|
|
"removed_categories": []
|
|
}
|
|
|
|
old_categories = set(old_results.keys())
|
|
new_categories = set(new_results.keys())
|
|
|
|
# New categories
|
|
changes["new_categories"] = list(new_categories - old_categories)
|
|
|
|
# Removed categories
|
|
changes["removed_categories"] = list(old_categories - new_categories)
|
|
|
|
# Analyze changes per category
|
|
for category in new_categories:
|
|
old_count = old_results.get(category, {}).get("count", 0)
|
|
new_count = new_results.get(category, {}).get("count", 0)
|
|
change = new_count - old_count
|
|
|
|
if abs(change) > 0:
|
|
changes["category_changes"][category] = {
|
|
"old_count": old_count,
|
|
"new_count": new_count,
|
|
"change": change,
|
|
"change_percent": (change / old_count * 100) if old_count > 0 else 100
|
|
}
|
|
|
|
changes["total_poi_change"] += abs(change)
|
|
|
|
# Determine if changes are significant
|
|
# Significant if: 10+ POIs changed OR 20%+ change OR new/removed categories
|
|
total_old_pois = sum(data.get("count", 0) for data in old_results.values())
|
|
if total_old_pois > 0:
|
|
change_percent = (changes["total_poi_change"] / total_old_pois) * 100
|
|
changes["total_change_percent"] = change_percent
|
|
|
|
changes["has_significant_changes"] = (
|
|
changes["total_poi_change"] >= 10
|
|
or change_percent >= 20
|
|
or len(changes["new_categories"]) > 0
|
|
or len(changes["removed_categories"]) > 0
|
|
)
|
|
else:
|
|
changes["has_significant_changes"] = changes["total_poi_change"] > 0
|
|
|
|
return changes
|
|
|
|
async def get_pending_jobs(
|
|
self,
|
|
limit: int = 100,
|
|
session: Optional[AsyncSession] = None
|
|
) -> List[POIRefreshJob]:
|
|
"""
|
|
Get pending jobs that are due for execution.
|
|
|
|
Args:
|
|
limit: Maximum number of jobs to return
|
|
session: Database session
|
|
|
|
Returns:
|
|
List of pending jobs
|
|
"""
|
|
async def _get_jobs(db_session: AsyncSession):
|
|
result = await db_session.execute(
|
|
select(POIRefreshJob)
|
|
.where(
|
|
and_(
|
|
POIRefreshJob.status == "pending",
|
|
POIRefreshJob.scheduled_at <= datetime.now(timezone.utc)
|
|
)
|
|
)
|
|
.order_by(POIRefreshJob.scheduled_at)
|
|
.limit(limit)
|
|
)
|
|
return result.scalars().all()
|
|
|
|
if session:
|
|
return await _get_jobs(session)
|
|
else:
|
|
async with database_manager.get_session() as db_session:
|
|
return await _get_jobs(db_session)
|
|
|
|
async def process_pending_jobs(
|
|
self,
|
|
max_concurrent: int = 5,
|
|
session: Optional[AsyncSession] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Process all pending jobs concurrently.
|
|
|
|
Args:
|
|
max_concurrent: Maximum concurrent job executions
|
|
session: Database session
|
|
|
|
Returns:
|
|
Processing summary
|
|
"""
|
|
pending_jobs = await self.get_pending_jobs(session=session)
|
|
|
|
if not pending_jobs:
|
|
logger.info("No pending POI refresh jobs")
|
|
return {
|
|
"total_jobs": 0,
|
|
"successful": 0,
|
|
"failed": 0,
|
|
"results": []
|
|
}
|
|
|
|
logger.info(
|
|
"Processing pending POI refresh jobs",
|
|
count=len(pending_jobs),
|
|
max_concurrent=max_concurrent
|
|
)
|
|
|
|
# Process jobs with concurrency limit
|
|
semaphore = asyncio.Semaphore(max_concurrent)
|
|
|
|
async def process_job(job: POIRefreshJob):
|
|
async with semaphore:
|
|
return await self.execute_refresh_job(str(job.id))
|
|
|
|
results = await asyncio.gather(
|
|
*[process_job(job) for job in pending_jobs],
|
|
return_exceptions=True
|
|
)
|
|
|
|
# Summarize results
|
|
successful = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "success")
|
|
failed = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "failed")
|
|
errors = sum(1 for r in results if isinstance(r, Exception))
|
|
|
|
summary = {
|
|
"total_jobs": len(pending_jobs),
|
|
"successful": successful,
|
|
"failed": failed + errors,
|
|
"results": [r if not isinstance(r, Exception) else {"status": "error", "error": str(r)} for r in results]
|
|
}
|
|
|
|
logger.info(
|
|
"POI refresh jobs processing completed",
|
|
**summary
|
|
)
|
|
|
|
return summary
|