Files
bakery-ia/services/external/app/services/poi_refresh_service.py

469 lines
15 KiB
Python

"""
POI Refresh Service
Manages periodic POI context refresh jobs.
Detects changes in POI landscape and updates tenant POI contexts.
"""
import asyncio
from datetime import datetime, timezone, timedelta
from typing import Optional, Dict, Any, List
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, or_
import structlog
from app.models.poi_refresh_job import POIRefreshJob
from app.models.poi_context import TenantPOIContext
from app.services.poi_detection_service import POIDetectionService
from app.core.database import database_manager
logger = structlog.get_logger()
class POIRefreshService:
"""
POI Refresh Service
Manages background jobs for periodic POI context refresh.
Default refresh cycle: 180 days (6 months).
"""
DEFAULT_REFRESH_INTERVAL_DAYS = 180
DEFAULT_MAX_ATTEMPTS = 3
STALE_THRESHOLD_DAYS = 180
def __init__(
self,
poi_detection_service: Optional[POIDetectionService] = None,
refresh_interval_days: int = DEFAULT_REFRESH_INTERVAL_DAYS
):
"""
Initialize POI refresh service.
Args:
poi_detection_service: POI detection service instance
refresh_interval_days: Days between POI refreshes (default: 180)
"""
self.poi_detection_service = poi_detection_service or POIDetectionService()
self.refresh_interval_days = refresh_interval_days
logger.info(
"POI Refresh Service initialized",
refresh_interval_days=refresh_interval_days
)
async def schedule_refresh_job(
self,
tenant_id: str,
latitude: float,
longitude: float,
scheduled_at: Optional[datetime] = None,
session: Optional[AsyncSession] = None
) -> POIRefreshJob:
"""
Schedule a POI refresh job for a tenant.
Args:
tenant_id: Tenant UUID
latitude: Bakery latitude
longitude: Bakery longitude
scheduled_at: When to run the job (default: now + refresh_interval)
session: Database session
Returns:
Created POIRefreshJob
"""
if scheduled_at is None:
scheduled_at = datetime.now(timezone.utc) + timedelta(
days=self.refresh_interval_days
)
async def _create_job(db_session: AsyncSession):
# Check if pending job already exists
result = await db_session.execute(
select(POIRefreshJob).where(
and_(
POIRefreshJob.tenant_id == tenant_id,
POIRefreshJob.status.in_(["pending", "running"])
)
)
)
existing_job = result.scalar_one_or_none()
if existing_job:
logger.info(
"POI refresh job already scheduled",
tenant_id=tenant_id,
job_id=str(existing_job.id),
scheduled_at=existing_job.scheduled_at
)
return existing_job
# Create new job
job = POIRefreshJob(
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude,
scheduled_at=scheduled_at,
status="pending",
max_attempts=self.DEFAULT_MAX_ATTEMPTS
)
db_session.add(job)
await db_session.commit()
await db_session.refresh(job)
logger.info(
"POI refresh job scheduled",
tenant_id=tenant_id,
job_id=str(job.id),
scheduled_at=scheduled_at
)
return job
if session:
return await _create_job(session)
else:
async with database_manager.get_session() as db_session:
return await _create_job(db_session)
async def execute_refresh_job(
self,
job_id: str,
session: Optional[AsyncSession] = None
) -> Dict[str, Any]:
"""
Execute a POI refresh job.
Args:
job_id: Job UUID
session: Database session
Returns:
Execution result with status and details
"""
async def _execute(db_session: AsyncSession):
# Load job
result = await db_session.execute(
select(POIRefreshJob).where(POIRefreshJob.id == job_id)
)
job = result.scalar_one_or_none()
if not job:
raise ValueError(f"Job not found: {job_id}")
if job.status == "running":
return {
"status": "already_running",
"job_id": str(job.id),
"message": "Job is already running"
}
if job.status == "completed":
return {
"status": "already_completed",
"job_id": str(job.id),
"message": "Job already completed"
}
if not job.can_retry:
return {
"status": "max_attempts_reached",
"job_id": str(job.id),
"message": f"Max attempts ({job.max_attempts}) reached"
}
# Update job status
job.status = "running"
job.started_at = datetime.now(timezone.utc)
job.attempt_count += 1
await db_session.commit()
logger.info(
"Executing POI refresh job",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
attempt=job.attempt_count
)
try:
# Get existing POI context
poi_result = await db_session.execute(
select(TenantPOIContext).where(
TenantPOIContext.tenant_id == job.tenant_id
)
)
existing_context = poi_result.scalar_one_or_none()
# Perform POI detection
detection_result = await self.poi_detection_service.detect_pois_for_bakery(
latitude=job.latitude,
longitude=job.longitude,
tenant_id=str(job.tenant_id),
force_refresh=True
)
# Analyze changes
changes = self._analyze_changes(
existing_context.poi_detection_results if existing_context else {},
detection_result
)
# Update job with results
job.status = "completed"
job.completed_at = datetime.now(timezone.utc)
job.pois_detected = sum(
data.get("count", 0)
for data in detection_result.values()
)
job.changes_detected = changes["has_significant_changes"]
job.change_summary = changes
# Schedule next refresh
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(
days=self.refresh_interval_days
)
await db_session.commit()
logger.info(
"POI refresh job completed",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
pois_detected=job.pois_detected,
changes_detected=job.changes_detected,
duration_seconds=job.duration_seconds
)
# Schedule next job
await self.schedule_refresh_job(
tenant_id=str(job.tenant_id),
latitude=job.latitude,
longitude=job.longitude,
scheduled_at=job.next_scheduled_at,
session=db_session
)
return {
"status": "success",
"job_id": str(job.id),
"pois_detected": job.pois_detected,
"changes_detected": job.changes_detected,
"change_summary": changes,
"duration_seconds": job.duration_seconds,
"next_scheduled_at": job.next_scheduled_at.isoformat()
}
except Exception as e:
# Job failed
job.status = "failed"
job.completed_at = datetime.now(timezone.utc)
job.error_message = str(e)
job.error_details = {
"error_type": type(e).__name__,
"error_message": str(e),
"attempt": job.attempt_count
}
# Schedule retry if attempts remaining
if job.can_retry:
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(hours=1)
logger.warning(
"POI refresh job failed, will retry",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
attempt=job.attempt_count,
max_attempts=job.max_attempts,
error=str(e)
)
else:
logger.error(
"POI refresh job failed permanently",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
attempt=job.attempt_count,
error=str(e),
exc_info=True
)
await db_session.commit()
return {
"status": "failed",
"job_id": str(job.id),
"error": str(e),
"attempt": job.attempt_count,
"can_retry": job.can_retry
}
if session:
return await _execute(session)
else:
async with database_manager.get_session() as db_session:
return await _execute(db_session)
def _analyze_changes(
self,
old_results: Dict[str, Any],
new_results: Dict[str, Any]
) -> Dict[str, Any]:
"""
Analyze changes between old and new POI detection results.
Args:
old_results: Previous POI detection results
new_results: New POI detection results
Returns:
Change analysis with significance flag
"""
changes = {
"has_significant_changes": False,
"category_changes": {},
"total_poi_change": 0,
"new_categories": [],
"removed_categories": []
}
old_categories = set(old_results.keys())
new_categories = set(new_results.keys())
# New categories
changes["new_categories"] = list(new_categories - old_categories)
# Removed categories
changes["removed_categories"] = list(old_categories - new_categories)
# Analyze changes per category
for category in new_categories:
old_count = old_results.get(category, {}).get("count", 0)
new_count = new_results.get(category, {}).get("count", 0)
change = new_count - old_count
if abs(change) > 0:
changes["category_changes"][category] = {
"old_count": old_count,
"new_count": new_count,
"change": change,
"change_percent": (change / old_count * 100) if old_count > 0 else 100
}
changes["total_poi_change"] += abs(change)
# Determine if changes are significant
# Significant if: 10+ POIs changed OR 20%+ change OR new/removed categories
total_old_pois = sum(data.get("count", 0) for data in old_results.values())
if total_old_pois > 0:
change_percent = (changes["total_poi_change"] / total_old_pois) * 100
changes["total_change_percent"] = change_percent
changes["has_significant_changes"] = (
changes["total_poi_change"] >= 10
or change_percent >= 20
or len(changes["new_categories"]) > 0
or len(changes["removed_categories"]) > 0
)
else:
changes["has_significant_changes"] = changes["total_poi_change"] > 0
return changes
async def get_pending_jobs(
self,
limit: int = 100,
session: Optional[AsyncSession] = None
) -> List[POIRefreshJob]:
"""
Get pending jobs that are due for execution.
Args:
limit: Maximum number of jobs to return
session: Database session
Returns:
List of pending jobs
"""
async def _get_jobs(db_session: AsyncSession):
result = await db_session.execute(
select(POIRefreshJob)
.where(
and_(
POIRefreshJob.status == "pending",
POIRefreshJob.scheduled_at <= datetime.now(timezone.utc)
)
)
.order_by(POIRefreshJob.scheduled_at)
.limit(limit)
)
return result.scalars().all()
if session:
return await _get_jobs(session)
else:
async with database_manager.get_session() as db_session:
return await _get_jobs(db_session)
async def process_pending_jobs(
self,
max_concurrent: int = 5,
session: Optional[AsyncSession] = None
) -> Dict[str, Any]:
"""
Process all pending jobs concurrently.
Args:
max_concurrent: Maximum concurrent job executions
session: Database session
Returns:
Processing summary
"""
pending_jobs = await self.get_pending_jobs(session=session)
if not pending_jobs:
logger.info("No pending POI refresh jobs")
return {
"total_jobs": 0,
"successful": 0,
"failed": 0,
"results": []
}
logger.info(
"Processing pending POI refresh jobs",
count=len(pending_jobs),
max_concurrent=max_concurrent
)
# Process jobs with concurrency limit
semaphore = asyncio.Semaphore(max_concurrent)
async def process_job(job: POIRefreshJob):
async with semaphore:
return await self.execute_refresh_job(str(job.id))
results = await asyncio.gather(
*[process_job(job) for job in pending_jobs],
return_exceptions=True
)
# Summarize results
successful = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "success")
failed = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "failed")
errors = sum(1 for r in results if isinstance(r, Exception))
summary = {
"total_jobs": len(pending_jobs),
"successful": successful,
"failed": failed + errors,
"results": [r if not isinstance(r, Exception) else {"status": "error", "error": str(r)} for r in results]
}
logger.info(
"POI refresh jobs processing completed",
**summary
)
return summary