Add POI feature and imporve the overall backend implementation

2025-11-12 15:34:10 +01:00
parent e8096cd979
commit 5783c7ed05
173 changed files with 16862 additions and 9078 deletions
--- a/services/external/app/services/competitor_analyzer.py
+++ b/services/external/app/services/competitor_analyzer.py
@@ -0,0 +1,269 @@
+"""
+Competitor Analyzer
+
+Specialized analysis for competitor bakeries with competitive pressure modeling.
+Treats competitor proximity differently than other POIs, considering market dynamics.
+"""
+
+from typing import Dict, List, Any, Tuple
+import structlog
+from math import radians, sin, cos, sqrt, atan2
+
+from app.core.poi_config import COMPETITOR_ZONES
+
+logger = structlog.get_logger()
+
+
+class CompetitorAnalyzer:
+    """
+    Competitive landscape analyzer for bakery locations.
+
+    Models competitive pressure considering:
+    - Direct competition (<100m): Strong negative impact
+    - Nearby competition (100-500m): Moderate negative impact
+    - Market saturation (500-1000m): Can be positive (bakery district)
+      or negative (competitive market)
+    """
+
+    def analyze_competitive_landscape(
+        self,
+        competitor_pois: List[Dict[str, Any]],
+        bakery_location: Tuple[float, float],
+        tenant_id: str = None
+    ) -> Dict[str, Any]:
+        """
+        Analyze competitive pressure from nearby bakeries.
+
+        Args:
+            competitor_pois: List of detected competitor POIs
+            bakery_location: Tuple of (latitude, longitude)
+            tenant_id: Optional tenant ID for logging
+
+        Returns:
+            Competitive analysis with pressure scores and market classification
+        """
+        if not competitor_pois:
+            logger.info(
+                "No competitors detected - underserved market",
+                tenant_id=tenant_id
+            )
+            return {
+                "competitive_pressure_score": 0.0,
+                "direct_competitors_count": 0,
+                "nearby_competitors_count": 0,
+                "market_competitors_count": 0,
+                "competitive_zone": "low_competition",
+                "market_type": "underserved",
+                "competitive_advantage": "first_mover",
+                "ml_feature_competitive_pressure": 0.0,
+                "ml_feature_has_direct_competitor": 0,
+                "ml_feature_competitor_density_500m": 0,
+                "competitor_details": []
+            }
+
+        # Categorize competitors by distance
+        direct_competitors = []  # <100m
+        nearby_competitors = []  # 100-500m
+        market_competitors = []  # 500-1000m
+        competitor_details = []
+
+        for poi in competitor_pois:
+            distance_m = self._calculate_distance(
+                bakery_location, (poi["lat"], poi["lon"])
+            ) * 1000
+
+            competitor_info = {
+                "name": poi.get("name", "Unnamed"),
+                "osm_id": poi.get("osm_id"),
+                "distance_m": round(distance_m, 1),
+                "lat": poi["lat"],
+                "lon": poi["lon"]
+            }
+
+            if distance_m < COMPETITOR_ZONES["direct"]["max_distance_m"]:
+                direct_competitors.append(poi)
+                competitor_info["zone"] = "direct"
+            elif distance_m < COMPETITOR_ZONES["nearby"]["max_distance_m"]:
+                nearby_competitors.append(poi)
+                competitor_info["zone"] = "nearby"
+            elif distance_m < COMPETITOR_ZONES["market"]["max_distance_m"]:
+                market_competitors.append(poi)
+                competitor_info["zone"] = "market"
+
+            competitor_details.append(competitor_info)
+
+        # Calculate competitive pressure score
+        direct_pressure = (
+            len(direct_competitors) *
+            COMPETITOR_ZONES["direct"]["pressure_multiplier"]
+        )
+        nearby_pressure = (
+            len(nearby_competitors) *
+            COMPETITOR_ZONES["nearby"]["pressure_multiplier"]
+        )
+
+        # Market saturation analysis
+        min_for_district = COMPETITOR_ZONES["market"]["min_count_for_district"]
+        if len(market_competitors) >= min_for_district:
+            # Many bakeries = destination area (bakery district)
+            market_pressure = COMPETITOR_ZONES["market"]["district_multiplier"]
+            market_type = "bakery_district"
+        elif len(market_competitors) > 2:
+            market_pressure = COMPETITOR_ZONES["market"]["normal_multiplier"]
+            market_type = "competitive_market"
+        else:
+            market_pressure = 0.0
+            market_type = "normal_market"
+
+        competitive_pressure_score = (
+            direct_pressure + nearby_pressure + market_pressure
+        )
+
+        # Determine competitive zone classification
+        if len(direct_competitors) > 0:
+            competitive_zone = "high_competition"
+            competitive_advantage = "differentiation_required"
+        elif len(nearby_competitors) > 2:
+            competitive_zone = "moderate_competition"
+            competitive_advantage = "quality_focused"
+        else:
+            competitive_zone = "low_competition"
+            competitive_advantage = "local_leader"
+
+        # Sort competitors by distance
+        competitor_details.sort(key=lambda x: x["distance_m"])
+
+        logger.info(
+            "Competitive analysis complete",
+            tenant_id=tenant_id,
+            competitive_zone=competitive_zone,
+            market_type=market_type,
+            total_competitors=len(competitor_pois),
+            direct=len(direct_competitors),
+            nearby=len(nearby_competitors),
+            market=len(market_competitors),
+            pressure_score=competitive_pressure_score
+        )
+
+        return {
+            # Summary scores
+            "competitive_pressure_score": round(competitive_pressure_score, 2),
+
+            # Competitor counts by zone
+            "direct_competitors_count": len(direct_competitors),
+            "nearby_competitors_count": len(nearby_competitors),
+            "market_competitors_count": len(market_competitors),
+            "total_competitors_count": len(competitor_pois),
+
+            # Market classification
+            "competitive_zone": competitive_zone,
+            "market_type": market_type,
+            "competitive_advantage": competitive_advantage,
+
+            # ML features (for model integration)
+            "ml_feature_competitive_pressure": round(competitive_pressure_score, 2),
+            "ml_feature_has_direct_competitor": 1 if len(direct_competitors) > 0 else 0,
+            "ml_feature_competitor_density_500m": (
+                len(direct_competitors) + len(nearby_competitors)
+            ),
+
+            # Detailed competitor information
+            "competitor_details": competitor_details,
+
+            # Nearest competitor
+            "nearest_competitor": competitor_details[0] if competitor_details else None
+        }
+
+    def _calculate_distance(
+        self,
+        coord1: Tuple[float, float],
+        coord2: Tuple[float, float]
+    ) -> float:
+        """
+        Calculate Haversine distance in kilometers.
+
+        Args:
+            coord1: Tuple of (latitude, longitude)
+            coord2: Tuple of (latitude, longitude)
+
+        Returns:
+            Distance in kilometers
+        """
+        lat1, lon1 = coord1
+        lat2, lon2 = coord2
+
+        R = 6371  # Earth radius in km
+
+        dlat = radians(lat2 - lat1)
+        dlon = radians(lon2 - lon1)
+
+        a = (sin(dlat/2)**2 +
+             cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
+        c = 2 * atan2(sqrt(a), sqrt(1-a))
+
+        return R * c
+
+    def get_competitive_insights(
+        self,
+        analysis_result: Dict[str, Any]
+    ) -> List[str]:
+        """
+        Generate human-readable competitive insights.
+
+        Args:
+            analysis_result: Result from analyze_competitive_landscape
+
+        Returns:
+            List of insight strings for business intelligence
+        """
+        insights = []
+
+        zone = analysis_result["competitive_zone"]
+        market = analysis_result["market_type"]
+        pressure = analysis_result["competitive_pressure_score"]
+        direct = analysis_result["direct_competitors_count"]
+        nearby = analysis_result["nearby_competitors_count"]
+
+        # Zone-specific insights
+        if zone == "high_competition":
+            insights.append(
+                f"⚠️ High competition: {direct} direct competitor(s) within 100m. "
+                "Focus on differentiation and quality."
+            )
+        elif zone == "moderate_competition":
+            insights.append(
+                f"Moderate competition: {nearby} nearby competitor(s) within 500m. "
+                "Good opportunity for market share."
+            )
+        else:
+            insights.append(
+                "✅ Low competition: Local market leader opportunity."
+            )
+
+        # Market type insights
+        if market == "bakery_district":
+            insights.append(
+                "📍 Bakery district: High foot traffic area with multiple bakeries. "
+                "Customers actively seek bakery products here."
+            )
+        elif market == "competitive_market":
+            insights.append(
+                "Market has multiple bakeries. Quality and customer service critical."
+            )
+        elif market == "underserved":
+            insights.append(
+                "🎯 Underserved market: Potential for strong customer base growth."
+            )
+
+        # Pressure score insight
+        if pressure < -1.5:
+            insights.append(
+                "Strong competitive pressure expected to impact demand. "
+                "Marketing and differentiation essential."
+            )
+        elif pressure > 0:
+            insights.append(
+                "Positive market dynamics: Location benefits from bakery destination traffic."
+            )
+
+        return insights
--- a/services/external/app/services/nominatim_service.py
+++ b/services/external/app/services/nominatim_service.py
@@ -0,0 +1,282 @@
+"""
+Nominatim Geocoding Service
+
+Provides address search and geocoding using OpenStreetMap Nominatim API.
+For development: uses public API (rate-limited)
+For production: should point to self-hosted Nominatim instance
+"""
+
+import httpx
+from typing import List, Dict, Any, Optional
+import structlog
+from asyncio import sleep
+
+logger = structlog.get_logger()
+
+
+class NominatimService:
+    """
+    Nominatim geocoding and address search service.
+
+    Uses OpenStreetMap Nominatim API for address autocomplete and geocoding.
+    Respects rate limits and usage policy.
+    """
+
+    # For development: public API (rate-limited to 1 req/sec)
+    # For production: should be overridden with self-hosted instance
+    DEFAULT_BASE_URL = "https://nominatim.openstreetmap.org"
+
+    def __init__(self, base_url: Optional[str] = None, user_agent: str = "BakeryIA-Forecasting/1.0"):
+        """
+        Initialize Nominatim service.
+
+        Args:
+            base_url: Nominatim server URL (defaults to public API)
+            user_agent: User agent for API requests (required by Nominatim policy)
+        """
+        self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip("/")
+        self.user_agent = user_agent
+        self.headers = {
+            "User-Agent": self.user_agent
+        }
+
+        # Rate limiting for public API (1 request per second)
+        self.is_public_api = self.base_url == self.DEFAULT_BASE_URL
+        self.min_request_interval = 1.0 if self.is_public_api else 0.0
+
+        logger.info(
+            "Nominatim service initialized",
+            base_url=self.base_url,
+            is_public_api=self.is_public_api,
+            rate_limit=f"{self.min_request_interval}s" if self.is_public_api else "none"
+        )
+
+    async def search_address(
+        self,
+        query: str,
+        country_code: str = "es",
+        limit: int = 10
+    ) -> List[Dict[str, Any]]:
+        """
+        Search for addresses matching query (autocomplete).
+
+        Args:
+            query: Address search query
+            country_code: ISO country code to restrict search (default: Spain)
+            limit: Maximum number of results
+
+        Returns:
+            List of address suggestions with display_name, lat, lon, osm_id, etc.
+        """
+        if not query or len(query.strip()) < 3:
+            logger.warning("Search query too short", query=query)
+            return []
+
+        try:
+            # Rate limiting for public API
+            if self.is_public_api:
+                await sleep(self.min_request_interval)
+
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(
+                    f"{self.base_url}/search",
+                    params={
+                        "q": query,
+                        "format": "json",
+                        "addressdetails": 1,
+                        "countrycodes": country_code,
+                        "limit": limit,
+                        "accept-language": "es"
+                    },
+                    headers=self.headers
+                )
+                response.raise_for_status()
+                results = response.json()
+
+                # Parse and enrich results
+                addresses = []
+                for result in results:
+                    addresses.append({
+                        "display_name": result.get("display_name"),
+                        "lat": float(result.get("lat")),
+                        "lon": float(result.get("lon")),
+                        "osm_type": result.get("osm_type"),
+                        "osm_id": result.get("osm_id"),
+                        "place_id": result.get("place_id"),
+                        "type": result.get("type"),
+                        "class": result.get("class"),
+                        "address": result.get("address", {}),
+                        "boundingbox": result.get("boundingbox", [])
+                    })
+
+                logger.info(
+                    "Address search completed",
+                    query=query,
+                    result_count=len(addresses)
+                )
+
+                return addresses
+
+        except httpx.HTTPError as e:
+            logger.error(
+                "Nominatim API request failed",
+                query=query,
+                error=str(e)
+            )
+            return []
+        except Exception as e:
+            logger.error(
+                "Unexpected error in address search",
+                query=query,
+                error=str(e),
+                exc_info=True
+            )
+            return []
+
+    async def geocode_address(
+        self,
+        address: str,
+        country_code: str = "es"
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Geocode an address to get coordinates.
+
+        Args:
+            address: Full address string
+            country_code: ISO country code
+
+        Returns:
+            Dictionary with lat, lon, display_name, address components or None
+        """
+        results = await self.search_address(address, country_code, limit=1)
+
+        if not results:
+            logger.warning("No geocoding results found", address=address)
+            return None
+
+        result = results[0]
+
+        logger.info(
+            "Address geocoded successfully",
+            address=address,
+            lat=result["lat"],
+            lon=result["lon"]
+        )
+
+        return result
+
+    async def reverse_geocode(
+        self,
+        latitude: float,
+        longitude: float
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Reverse geocode coordinates to get address.
+
+        Args:
+            latitude: Latitude coordinate
+            longitude: Longitude coordinate
+
+        Returns:
+            Dictionary with address information or None
+        """
+        try:
+            # Rate limiting for public API
+            if self.is_public_api:
+                await sleep(self.min_request_interval)
+
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(
+                    f"{self.base_url}/reverse",
+                    params={
+                        "lat": latitude,
+                        "lon": longitude,
+                        "format": "json",
+                        "addressdetails": 1,
+                        "accept-language": "es"
+                    },
+                    headers=self.headers
+                )
+                response.raise_for_status()
+                result = response.json()
+
+                address_info = {
+                    "display_name": result.get("display_name"),
+                    "lat": float(result.get("lat")),
+                    "lon": float(result.get("lon")),
+                    "osm_type": result.get("osm_type"),
+                    "osm_id": result.get("osm_id"),
+                    "place_id": result.get("place_id"),
+                    "address": result.get("address", {}),
+                    "boundingbox": result.get("boundingbox", [])
+                }
+
+                logger.info(
+                    "Reverse geocoding completed",
+                    lat=latitude,
+                    lon=longitude,
+                    address=address_info["display_name"]
+                )
+
+                return address_info
+
+        except httpx.HTTPError as e:
+            logger.error(
+                "Nominatim reverse geocoding failed",
+                lat=latitude,
+                lon=longitude,
+                error=str(e)
+            )
+            return None
+        except Exception as e:
+            logger.error(
+                "Unexpected error in reverse geocoding",
+                lat=latitude,
+                lon=longitude,
+                error=str(e),
+                exc_info=True
+            )
+            return None
+
+    async def validate_coordinates(
+        self,
+        latitude: float,
+        longitude: float
+    ) -> bool:
+        """
+        Validate that coordinates point to a real location.
+
+        Args:
+            latitude: Latitude to validate
+            longitude: Longitude to validate
+
+        Returns:
+            True if coordinates are valid, False otherwise
+        """
+        if not (-90 <= latitude <= 90 and -180 <= longitude <= 180):
+            return False
+
+        result = await self.reverse_geocode(latitude, longitude)
+        return result is not None
+
+    async def health_check(self) -> bool:
+        """
+        Check if Nominatim service is accessible.
+
+        Returns:
+            True if service is healthy, False otherwise
+        """
+        try:
+            async with httpx.AsyncClient(timeout=5.0) as client:
+                response = await client.get(
+                    f"{self.base_url}/status",
+                    params={"format": "json"},
+                    headers=self.headers
+                )
+                return response.status_code == 200
+        except Exception as e:
+            logger.error(
+                "Nominatim health check failed",
+                error=str(e)
+            )
+            return False
--- a/services/external/app/services/poi_detection_service.py
+++ b/services/external/app/services/poi_detection_service.py
@@ -0,0 +1,466 @@
+"""
+POI Detection Service
+
+Automated Point of Interest detection using Overpass API (OpenStreetMap).
+Detects nearby POIs around bakery locations and generates ML features
+for location-based demand forecasting.
+"""
+
+import overpy
+from typing import List, Dict, Any, Tuple, Optional
+from datetime import datetime, timezone, timedelta
+import asyncio
+import structlog
+import httpx
+from math import radians, sin, cos, sqrt, atan2
+import random
+
+from app.core.poi_config import (
+    POI_CATEGORIES,
+    OVERPASS_API_URL,
+    OVERPASS_TIMEOUT_SECONDS,
+    OVERPASS_MAX_RETRIES,
+    OVERPASS_RETRY_DELAY_SECONDS,
+    DISTANCE_BANDS
+)
+
+logger = structlog.get_logger()
+
+
+class POIDetectionService:
+    """
+    Automated POI detection using Overpass API (OpenStreetMap).
+
+    Detects points of interest near bakery locations and calculates
+    ML features for demand forecasting with location-specific context.
+    """
+
+    def __init__(self, overpass_url: str = OVERPASS_API_URL):
+        self.overpass_url = overpass_url
+        self.api = overpy.Overpass(url=overpass_url)
+        self.timeout = OVERPASS_TIMEOUT_SECONDS
+
+    async def detect_pois_for_bakery(
+        self,
+        latitude: float,
+        longitude: float,
+        tenant_id: str
+    ) -> Dict[str, Any]:
+        """
+        Detect all POIs around a bakery location.
+
+        Args:
+            latitude: Bakery latitude
+            longitude: Bakery longitude
+            tenant_id: Tenant identifier for logging
+
+        Returns:
+            Complete POI detection results with ML features
+        """
+        logger.info(
+            "Starting POI detection",
+            tenant_id=tenant_id,
+            location=(latitude, longitude)
+        )
+
+        poi_results = {}
+        detection_errors = []
+
+        # Query each POI category with inter-query delays
+        category_items = list(POI_CATEGORIES.items())
+        for idx, (category_key, category) in enumerate(category_items):
+            try:
+                pois = await self._query_pois_with_retry(
+                    latitude,
+                    longitude,
+                    category.osm_query,
+                    category.search_radius_m,
+                    category_key
+                )
+
+                # Calculate features for this category
+                features = self._calculate_poi_features(
+                    pois,
+                    (latitude, longitude),
+                    category
+                )
+
+                poi_results[category_key] = {
+                    "pois": pois,
+                    "features": features,
+                    "count": len(pois)
+                }
+
+                logger.info(
+                    f"Detected {category_key}",
+                    count=len(pois),
+                    proximity_score=features["proximity_score"]
+                )
+
+                # Add delay between categories to respect rate limits
+                # (except after the last category)
+                if idx < len(category_items) - 1:
+                    inter_query_delay = 2.0 + random.uniform(0.5, 1.5)
+                    await asyncio.sleep(inter_query_delay)
+
+            except Exception as e:
+                logger.error(
+                    f"Failed to detect {category_key}",
+                    error=str(e),
+                    tenant_id=tenant_id
+                )
+                detection_errors.append({
+                    "category": category_key,
+                    "error": str(e)
+                })
+                poi_results[category_key] = {
+                    "pois": [],
+                    "features": self._get_empty_features(),
+                    "count": 0,
+                    "error": str(e)
+                }
+
+                # Add a longer delay after an error before continuing
+                if idx < len(category_items) - 1:
+                    error_recovery_delay = 3.0 + random.uniform(1.0, 2.0)
+                    await asyncio.sleep(error_recovery_delay)
+
+        # Generate combined ML features
+        ml_features = self._generate_ml_features(poi_results)
+
+        # Generate summary
+        summary = self._generate_summary(poi_results)
+
+        detection_status = "completed" if not detection_errors else (
+            "partial" if len(detection_errors) < len(POI_CATEGORIES) else "failed"
+        )
+
+        return {
+            "tenant_id": tenant_id,
+            "location": {"latitude": latitude, "longitude": longitude},
+            "detection_timestamp": datetime.now(timezone.utc).isoformat(),
+            "detection_status": detection_status,
+            "detection_errors": detection_errors if detection_errors else None,
+            "poi_categories": poi_results,
+            "ml_features": ml_features,
+            "summary": summary
+        }
+
+    async def _query_pois_with_retry(
+        self,
+        latitude: float,
+        longitude: float,
+        osm_query: str,
+        radius_m: int,
+        category_key: str
+    ) -> List[Dict[str, Any]]:
+        """
+        Query Overpass API with exponential backoff retry logic.
+
+        Implements:
+        - Exponential backoff with jitter
+        - Extended delays for rate limiting errors
+        - Proper error type detection
+        """
+        last_error = None
+        base_delay = OVERPASS_RETRY_DELAY_SECONDS
+
+        for attempt in range(OVERPASS_MAX_RETRIES):
+            try:
+                return await self._query_pois(
+                    latitude, longitude, osm_query, radius_m
+                )
+            except Exception as e:
+                last_error = e
+                error_message = str(e).lower()
+
+                # Determine if this is a rate limiting error
+                is_rate_limit = any(phrase in error_message for phrase in [
+                    'too many requests',
+                    'rate limit',
+                    'server load too high',
+                    'quota exceeded',
+                    'retry later',
+                    '429',
+                    '503',
+                    '504'
+                ])
+
+                if attempt < OVERPASS_MAX_RETRIES - 1:
+                    # Calculate exponential backoff with jitter
+                    # For rate limiting: use longer delays (10-30 seconds)
+                    # For other errors: use standard backoff (2-8 seconds)
+                    if is_rate_limit:
+                        delay = base_delay * (3 ** attempt) + random.uniform(1, 5)
+                        delay = min(delay, 30)  # Cap at 30 seconds
+                    else:
+                        delay = base_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
+                        delay = min(delay, 10)  # Cap at 10 seconds
+
+                    logger.warning(
+                        f"POI query retry {attempt + 1}/{OVERPASS_MAX_RETRIES}",
+                        category=category_key,
+                        error=str(e),
+                        is_rate_limit=is_rate_limit,
+                        retry_delay=f"{delay:.1f}s"
+                    )
+                    await asyncio.sleep(delay)
+                else:
+                    logger.error(
+                        "POI query failed after all retries",
+                        category=category_key,
+                        error=str(e),
+                        is_rate_limit=is_rate_limit
+                    )
+
+        raise last_error
+
+    async def _query_pois(
+        self,
+        latitude: float,
+        longitude: float,
+        osm_query: str,
+        radius_m: int
+    ) -> List[Dict[str, Any]]:
+        """
+        Query Overpass API for POIs in radius.
+
+        Raises:
+            Exception: With descriptive error message from Overpass API
+        """
+
+        # Build Overpass QL query
+        query = f"""
+        [out:json][timeout:{self.timeout}];
+        (
+          node{osm_query}(around:{radius_m},{latitude},{longitude});
+          way{osm_query}(around:{radius_m},{latitude},{longitude});
+        );
+        out center;
+        """
+
+        # Execute query (use asyncio thread pool for blocking overpy)
+        loop = asyncio.get_event_loop()
+        try:
+            result = await loop.run_in_executor(
+                None,
+                self.api.query,
+                query
+            )
+        except overpy.exception.OverpassTooManyRequests as e:
+            # Explicitly handle rate limiting
+            raise Exception("Too many requests - Overpass API rate limit exceeded") from e
+        except overpy.exception.OverpassGatewayTimeout as e:
+            # Query took too long
+            raise Exception("Gateway timeout - query too complex or server busy") from e
+        except overpy.exception.OverpassBadRequest as e:
+            # Query syntax error
+            raise Exception(f"Bad request - invalid query syntax: {str(e)}") from e
+        except Exception as e:
+            # Check if it's an HTTP error with status code
+            error_msg = str(e).lower()
+            if '429' in error_msg or 'too many' in error_msg:
+                raise Exception("Too many requests - rate limit exceeded") from e
+            elif '503' in error_msg or 'load too high' in error_msg:
+                raise Exception("Server load too high - Overpass API overloaded") from e
+            elif '504' in error_msg or 'timeout' in error_msg:
+                raise Exception("Gateway timeout - server busy") from e
+            else:
+                # Re-raise with original message
+                raise
+
+        # Parse results
+        pois = []
+
+        # Process nodes
+        for node in result.nodes:
+            pois.append({
+                "osm_id": str(node.id),
+                "type": "node",
+                "lat": float(node.lat),
+                "lon": float(node.lon),
+                "tags": dict(node.tags),
+                "name": node.tags.get("name", "Unnamed")
+            })
+
+        # Process ways (buildings, areas)
+        for way in result.ways:
+            # Get center point
+            if hasattr(way, 'center_lat') and way.center_lat:
+                lat, lon = float(way.center_lat), float(way.center_lon)
+            else:
+                # Calculate centroid from nodes
+                if way.nodes:
+                    lats = [float(node.lat) for node in way.nodes]
+                    lons = [float(node.lon) for node in way.nodes]
+                    lat = sum(lats) / len(lats)
+                    lon = sum(lons) / len(lons)
+                else:
+                    continue
+
+            pois.append({
+                "osm_id": str(way.id),
+                "type": "way",
+                "lat": lat,
+                "lon": lon,
+                "tags": dict(way.tags),
+                "name": way.tags.get("name", "Unnamed")
+            })
+
+        return pois
+
+    def _calculate_poi_features(
+        self,
+        pois: List[Dict[str, Any]],
+        bakery_location: Tuple[float, float],
+        category
+    ) -> Dict[str, float]:
+        """Calculate ML features for POI category"""
+
+        if not pois:
+            return self._get_empty_features()
+
+        # Calculate distances
+        distances = []
+        for poi in pois:
+            dist_km = self._haversine_distance(
+                bakery_location,
+                (poi["lat"], poi["lon"])
+            )
+            distances.append(dist_km * 1000)  # Convert to meters
+
+        # Feature Tier 1: Proximity Scores (PRIMARY)
+        proximity_score = sum(1.0 / (1.0 + d/1000) for d in distances)
+        weighted_proximity_score = proximity_score * category.weight
+
+        # Feature Tier 2: Distance Band Counts
+        count_0_100m = sum(1 for d in distances if d <= 100)
+        count_100_300m = sum(1 for d in distances if 100 < d <= 300)
+        count_300_500m = sum(1 for d in distances if 300 < d <= 500)
+        count_500_1000m = sum(1 for d in distances if 500 < d <= 1000)
+
+        # Feature Tier 3: Distance to Nearest
+        distance_to_nearest_m = min(distances) if distances else 9999.0
+
+        # Feature Tier 4: Binary Flags
+        has_within_100m = any(d <= 100 for d in distances)
+        has_within_300m = any(d <= 300 for d in distances)
+        has_within_500m = any(d <= 500 for d in distances)
+
+        return {
+            # Tier 1: Proximity scores (PRIMARY for ML)
+            "proximity_score": round(proximity_score, 4),
+            "weighted_proximity_score": round(weighted_proximity_score, 4),
+
+            # Tier 2: Distance bands
+            "count_0_100m": count_0_100m,
+            "count_100_300m": count_100_300m,
+            "count_300_500m": count_300_500m,
+            "count_500_1000m": count_500_1000m,
+            "total_count": len(pois),
+
+            # Tier 3: Distance to nearest
+            "distance_to_nearest_m": round(distance_to_nearest_m, 1),
+
+            # Tier 4: Binary flags
+            "has_within_100m": has_within_100m,
+            "has_within_300m": has_within_300m,
+            "has_within_500m": has_within_500m
+        }
+
+    def _generate_ml_features(self, poi_results: Dict[str, Any]) -> Dict[str, float]:
+        """
+        Generate flat feature dictionary for ML model ingestion.
+
+        These features will be added to Prophet/XGBoost as regressors.
+        """
+        ml_features = {}
+
+        for category_key, data in poi_results.items():
+            features = data.get("features", {})
+
+            # Flatten with category prefix
+            for feature_name, value in features.items():
+                ml_feature_name = f"poi_{category_key}_{feature_name}"
+                # Convert boolean to int for ML
+                if isinstance(value, bool):
+                    value = 1 if value else 0
+                ml_features[ml_feature_name] = value
+
+        return ml_features
+
+    def _get_empty_features(self) -> Dict[str, float]:
+        """Return zero features when no POIs found"""
+        return {
+            "proximity_score": 0.0,
+            "weighted_proximity_score": 0.0,
+            "count_0_100m": 0,
+            "count_100_300m": 0,
+            "count_300_500m": 0,
+            "count_500_1000m": 0,
+            "total_count": 0,
+            "distance_to_nearest_m": 9999.0,
+            "has_within_100m": False,
+            "has_within_300m": False,
+            "has_within_500m": False
+        }
+
+    def _haversine_distance(
+        self,
+        coord1: Tuple[float, float],
+        coord2: Tuple[float, float]
+    ) -> float:
+        """
+        Calculate distance between two coordinates in kilometers.
+
+        Uses Haversine formula for great-circle distance.
+        """
+        lat1, lon1 = coord1
+        lat2, lon2 = coord2
+
+        R = 6371  # Earth radius in km
+
+        dlat = radians(lat2 - lat1)
+        dlon = radians(lon2 - lon1)
+
+        a = (sin(dlat/2)**2 +
+             cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
+        c = 2 * atan2(sqrt(a), sqrt(1-a))
+
+        return R * c
+
+    def _generate_summary(self, poi_results: Dict[str, Any]) -> Dict[str, Any]:
+        """Generate human-readable summary"""
+        total_pois = sum(r["count"] for r in poi_results.values())
+        categories_with_pois = [
+            k for k, v in poi_results.items() if v["count"] > 0
+        ]
+        high_impact_categories = [
+            k for k, v in poi_results.items()
+            if v["features"]["proximity_score"] > 2.0
+        ]
+
+        return {
+            "total_pois_detected": total_pois,
+            "categories_with_pois": categories_with_pois,
+            "high_impact_categories": high_impact_categories,
+            "categories_count": len(categories_with_pois)
+        }
+
+    async def health_check(self) -> Dict[str, Any]:
+        """Check if Overpass API is accessible"""
+        try:
+            async with httpx.AsyncClient(timeout=5) as client:
+                response = await client.get(f"{self.overpass_url}/status")
+                is_healthy = response.status_code == 200
+                return {
+                    "healthy": is_healthy,
+                    "status_code": response.status_code,
+                    "url": self.overpass_url
+                }
+        except Exception as e:
+            return {
+                "healthy": False,
+                "error": str(e),
+                "url": self.overpass_url
+            }
--- a/services/external/app/services/poi_feature_selector.py
+++ b/services/external/app/services/poi_feature_selector.py
@@ -0,0 +1,184 @@
+"""
+POI Feature Selector
+
+Determines which POI features are relevant for ML model inclusion.
+Filters out low-signal features to prevent model noise and overfitting.
+"""
+
+from typing import Dict, List, Any
+import structlog
+
+from app.core.poi_config import RELEVANCE_THRESHOLDS
+
+logger = structlog.get_logger()
+
+
+class POIFeatureSelector:
+    """
+    Feature relevance engine for POI-based ML features.
+
+    Applies research-based thresholds to filter out irrelevant POI features
+    that would add noise to bakery-specific demand forecasting models.
+    """
+
+    def __init__(self, thresholds: Dict[str, Dict[str, float]] = None):
+        """
+        Initialize feature selector.
+
+        Args:
+            thresholds: Custom relevance thresholds (defaults to RELEVANCE_THRESHOLDS)
+        """
+        self.thresholds = thresholds or RELEVANCE_THRESHOLDS
+
+    def select_relevant_features(
+        self,
+        poi_detection_results: Dict[str, Any],
+        tenant_id: str = None
+    ) -> Dict[str, Any]:
+        """
+        Filter POI features based on relevance thresholds.
+
+        Only includes features for POI categories that pass relevance tests.
+        This prevents adding noise to ML models for bakeries where certain
+        POI categories are not significant.
+
+        Args:
+            poi_detection_results: Full POI detection results
+            tenant_id: Optional tenant ID for logging
+
+        Returns:
+            Dictionary with relevant features and detailed relevance report
+        """
+        relevant_features = {}
+        relevance_report = []
+        relevant_categories = []
+
+        for category_key, data in poi_detection_results.items():
+            features = data.get("features", {})
+            thresholds = self.thresholds.get(category_key, {})
+
+            if not thresholds:
+                logger.warning(
+                    f"No thresholds defined for category {category_key}",
+                    tenant_id=tenant_id
+                )
+                continue
+
+            # Check relevance criteria
+            is_relevant, rejection_reason = self._check_relevance(
+                features, thresholds, category_key
+            )
+
+            if is_relevant:
+                # Include features with category prefix
+                for feature_name, value in features.items():
+                    ml_feature_name = f"poi_{category_key}_{feature_name}"
+                    # Convert boolean to int for ML
+                    if isinstance(value, bool):
+                        value = 1 if value else 0
+                    relevant_features[ml_feature_name] = value
+
+                relevant_categories.append(category_key)
+                relevance_report.append({
+                    "category": category_key,
+                    "relevant": True,
+                    "reason": "Passes all relevance thresholds",
+                    "proximity_score": features.get("proximity_score", 0),
+                    "count": features.get("total_count", 0),
+                    "distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
+                })
+            else:
+                relevance_report.append({
+                    "category": category_key,
+                    "relevant": False,
+                    "reason": rejection_reason,
+                    "proximity_score": features.get("proximity_score", 0),
+                    "count": features.get("total_count", 0),
+                    "distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
+                })
+
+        logger.info(
+            "POI feature selection complete",
+            tenant_id=tenant_id,
+            total_categories=len(poi_detection_results),
+            relevant_categories=len(relevant_categories),
+            rejected_categories=len(poi_detection_results) - len(relevant_categories)
+        )
+
+        return {
+            "features": relevant_features,
+            "relevant_categories": relevant_categories,
+            "relevance_report": relevance_report,
+            "total_features": len(relevant_features),
+            "total_relevant_categories": len(relevant_categories)
+        }
+
+    def _check_relevance(
+        self,
+        features: Dict[str, Any],
+        thresholds: Dict[str, float],
+        category_key: str
+    ) -> tuple[bool, str]:
+        """
+        Check if POI category passes relevance thresholds.
+
+        Returns:
+            Tuple of (is_relevant, rejection_reason)
+        """
+        # Criterion 1: Proximity score
+        min_proximity = thresholds.get("min_proximity_score", 0)
+        actual_proximity = features.get("proximity_score", 0)
+        if actual_proximity < min_proximity:
+            return False, f"Proximity score too low ({actual_proximity:.2f} < {min_proximity})"
+
+        # Criterion 2: Distance to nearest
+        max_distance = thresholds.get("max_distance_to_nearest_m", 9999)
+        actual_distance = features.get("distance_to_nearest_m", 9999)
+        if actual_distance > max_distance:
+            return False, f"Nearest POI too far ({actual_distance:.0f}m > {max_distance}m)"
+
+        # Criterion 3: Count threshold
+        min_count = thresholds.get("min_count", 0)
+        actual_count = features.get("total_count", 0)
+        if actual_count < min_count:
+            return False, f"Count too low ({actual_count} < {min_count})"
+
+        return True, "Passes all thresholds"
+
+    def get_feature_importance_summary(
+        self,
+        poi_detection_results: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate summary of feature importance for all categories.
+
+        Useful for understanding POI landscape around a bakery.
+        """
+        summary = []
+
+        for category_key, data in poi_detection_results.items():
+            features = data.get("features", {})
+            thresholds = self.thresholds.get(category_key, {})
+
+            is_relevant, reason = self._check_relevance(
+                features, thresholds, category_key
+            ) if thresholds else (False, "No thresholds defined")
+
+            summary.append({
+                "category": category_key,
+                "is_relevant": is_relevant,
+                "proximity_score": features.get("proximity_score", 0),
+                "weighted_score": features.get("weighted_proximity_score", 0),
+                "total_count": features.get("total_count", 0),
+                "distance_to_nearest_m": features.get("distance_to_nearest_m", 9999),
+                "has_within_100m": features.get("has_within_100m", False),
+                "rejection_reason": None if is_relevant else reason
+            })
+
+        # Sort by relevance and proximity score
+        summary.sort(
+            key=lambda x: (x["is_relevant"], x["proximity_score"]),
+            reverse=True
+        )
+
+        return summary
--- a/services/external/app/services/poi_refresh_service.py
+++ b/services/external/app/services/poi_refresh_service.py
@@ -0,0 +1,468 @@
+"""
+POI Refresh Service
+
+Manages periodic POI context refresh jobs.
+Detects changes in POI landscape and updates tenant POI contexts.
+"""
+
+import asyncio
+from datetime import datetime, timezone, timedelta
+from typing import Optional, Dict, Any, List
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select, and_, or_
+import structlog
+
+from app.models.poi_refresh_job import POIRefreshJob
+from app.models.poi_context import TenantPOIContext
+from app.services.poi_detection_service import POIDetectionService
+from app.core.database import database_manager
+
+logger = structlog.get_logger()
+
+
+class POIRefreshService:
+    """
+    POI Refresh Service
+
+    Manages background jobs for periodic POI context refresh.
+    Default refresh cycle: 180 days (6 months).
+    """
+
+    DEFAULT_REFRESH_INTERVAL_DAYS = 180
+    DEFAULT_MAX_ATTEMPTS = 3
+    STALE_THRESHOLD_DAYS = 180
+
+    def __init__(
+        self,
+        poi_detection_service: Optional[POIDetectionService] = None,
+        refresh_interval_days: int = DEFAULT_REFRESH_INTERVAL_DAYS
+    ):
+        """
+        Initialize POI refresh service.
+
+        Args:
+            poi_detection_service: POI detection service instance
+            refresh_interval_days: Days between POI refreshes (default: 180)
+        """
+        self.poi_detection_service = poi_detection_service or POIDetectionService()
+        self.refresh_interval_days = refresh_interval_days
+
+        logger.info(
+            "POI Refresh Service initialized",
+            refresh_interval_days=refresh_interval_days
+        )
+
+    async def schedule_refresh_job(
+        self,
+        tenant_id: str,
+        latitude: float,
+        longitude: float,
+        scheduled_at: Optional[datetime] = None,
+        session: Optional[AsyncSession] = None
+    ) -> POIRefreshJob:
+        """
+        Schedule a POI refresh job for a tenant.
+
+        Args:
+            tenant_id: Tenant UUID
+            latitude: Bakery latitude
+            longitude: Bakery longitude
+            scheduled_at: When to run the job (default: now + refresh_interval)
+            session: Database session
+
+        Returns:
+            Created POIRefreshJob
+        """
+        if scheduled_at is None:
+            scheduled_at = datetime.now(timezone.utc) + timedelta(
+                days=self.refresh_interval_days
+            )
+
+        async def _create_job(db_session: AsyncSession):
+            # Check if pending job already exists
+            result = await db_session.execute(
+                select(POIRefreshJob).where(
+                    and_(
+                        POIRefreshJob.tenant_id == tenant_id,
+                        POIRefreshJob.status.in_(["pending", "running"])
+                    )
+                )
+            )
+            existing_job = result.scalar_one_or_none()
+
+            if existing_job:
+                logger.info(
+                    "POI refresh job already scheduled",
+                    tenant_id=tenant_id,
+                    job_id=str(existing_job.id),
+                    scheduled_at=existing_job.scheduled_at
+                )
+                return existing_job
+
+            # Create new job
+            job = POIRefreshJob(
+                tenant_id=tenant_id,
+                latitude=latitude,
+                longitude=longitude,
+                scheduled_at=scheduled_at,
+                status="pending",
+                max_attempts=self.DEFAULT_MAX_ATTEMPTS
+            )
+
+            db_session.add(job)
+            await db_session.commit()
+            await db_session.refresh(job)
+
+            logger.info(
+                "POI refresh job scheduled",
+                tenant_id=tenant_id,
+                job_id=str(job.id),
+                scheduled_at=scheduled_at
+            )
+
+            return job
+
+        if session:
+            return await _create_job(session)
+        else:
+            async with database_manager.get_session() as db_session:
+                return await _create_job(db_session)
+
+    async def execute_refresh_job(
+        self,
+        job_id: str,
+        session: Optional[AsyncSession] = None
+    ) -> Dict[str, Any]:
+        """
+        Execute a POI refresh job.
+
+        Args:
+            job_id: Job UUID
+            session: Database session
+
+        Returns:
+            Execution result with status and details
+        """
+        async def _execute(db_session: AsyncSession):
+            # Load job
+            result = await db_session.execute(
+                select(POIRefreshJob).where(POIRefreshJob.id == job_id)
+            )
+            job = result.scalar_one_or_none()
+
+            if not job:
+                raise ValueError(f"Job not found: {job_id}")
+
+            if job.status == "running":
+                return {
+                    "status": "already_running",
+                    "job_id": str(job.id),
+                    "message": "Job is already running"
+                }
+
+            if job.status == "completed":
+                return {
+                    "status": "already_completed",
+                    "job_id": str(job.id),
+                    "message": "Job already completed"
+                }
+
+            if not job.can_retry:
+                return {
+                    "status": "max_attempts_reached",
+                    "job_id": str(job.id),
+                    "message": f"Max attempts ({job.max_attempts}) reached"
+                }
+
+            # Update job status
+            job.status = "running"
+            job.started_at = datetime.now(timezone.utc)
+            job.attempt_count += 1
+            await db_session.commit()
+
+            logger.info(
+                "Executing POI refresh job",
+                job_id=str(job.id),
+                tenant_id=str(job.tenant_id),
+                attempt=job.attempt_count
+            )
+
+            try:
+                # Get existing POI context
+                poi_result = await db_session.execute(
+                    select(TenantPOIContext).where(
+                        TenantPOIContext.tenant_id == job.tenant_id
+                    )
+                )
+                existing_context = poi_result.scalar_one_or_none()
+
+                # Perform POI detection
+                detection_result = await self.poi_detection_service.detect_pois_for_bakery(
+                    latitude=job.latitude,
+                    longitude=job.longitude,
+                    tenant_id=str(job.tenant_id),
+                    force_refresh=True
+                )
+
+                # Analyze changes
+                changes = self._analyze_changes(
+                    existing_context.poi_detection_results if existing_context else {},
+                    detection_result
+                )
+
+                # Update job with results
+                job.status = "completed"
+                job.completed_at = datetime.now(timezone.utc)
+                job.pois_detected = sum(
+                    data.get("count", 0)
+                    for data in detection_result.values()
+                )
+                job.changes_detected = changes["has_significant_changes"]
+                job.change_summary = changes
+
+                # Schedule next refresh
+                job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(
+                    days=self.refresh_interval_days
+                )
+
+                await db_session.commit()
+
+                logger.info(
+                    "POI refresh job completed",
+                    job_id=str(job.id),
+                    tenant_id=str(job.tenant_id),
+                    pois_detected=job.pois_detected,
+                    changes_detected=job.changes_detected,
+                    duration_seconds=job.duration_seconds
+                )
+
+                # Schedule next job
+                await self.schedule_refresh_job(
+                    tenant_id=str(job.tenant_id),
+                    latitude=job.latitude,
+                    longitude=job.longitude,
+                    scheduled_at=job.next_scheduled_at,
+                    session=db_session
+                )
+
+                return {
+                    "status": "success",
+                    "job_id": str(job.id),
+                    "pois_detected": job.pois_detected,
+                    "changes_detected": job.changes_detected,
+                    "change_summary": changes,
+                    "duration_seconds": job.duration_seconds,
+                    "next_scheduled_at": job.next_scheduled_at.isoformat()
+                }
+
+            except Exception as e:
+                # Job failed
+                job.status = "failed"
+                job.completed_at = datetime.now(timezone.utc)
+                job.error_message = str(e)
+                job.error_details = {
+                    "error_type": type(e).__name__,
+                    "error_message": str(e),
+                    "attempt": job.attempt_count
+                }
+
+                # Schedule retry if attempts remaining
+                if job.can_retry:
+                    job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(hours=1)
+                    logger.warning(
+                        "POI refresh job failed, will retry",
+                        job_id=str(job.id),
+                        tenant_id=str(job.tenant_id),
+                        attempt=job.attempt_count,
+                        max_attempts=job.max_attempts,
+                        error=str(e)
+                    )
+                else:
+                    logger.error(
+                        "POI refresh job failed permanently",
+                        job_id=str(job.id),
+                        tenant_id=str(job.tenant_id),
+                        attempt=job.attempt_count,
+                        error=str(e),
+                        exc_info=True
+                    )
+
+                await db_session.commit()
+
+                return {
+                    "status": "failed",
+                    "job_id": str(job.id),
+                    "error": str(e),
+                    "attempt": job.attempt_count,
+                    "can_retry": job.can_retry
+                }
+
+        if session:
+            return await _execute(session)
+        else:
+            async with database_manager.get_session() as db_session:
+                return await _execute(db_session)
+
+    def _analyze_changes(
+        self,
+        old_results: Dict[str, Any],
+        new_results: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Analyze changes between old and new POI detection results.
+
+        Args:
+            old_results: Previous POI detection results
+            new_results: New POI detection results
+
+        Returns:
+            Change analysis with significance flag
+        """
+        changes = {
+            "has_significant_changes": False,
+            "category_changes": {},
+            "total_poi_change": 0,
+            "new_categories": [],
+            "removed_categories": []
+        }
+
+        old_categories = set(old_results.keys())
+        new_categories = set(new_results.keys())
+
+        # New categories
+        changes["new_categories"] = list(new_categories - old_categories)
+
+        # Removed categories
+        changes["removed_categories"] = list(old_categories - new_categories)
+
+        # Analyze changes per category
+        for category in new_categories:
+            old_count = old_results.get(category, {}).get("count", 0)
+            new_count = new_results.get(category, {}).get("count", 0)
+            change = new_count - old_count
+
+            if abs(change) > 0:
+                changes["category_changes"][category] = {
+                    "old_count": old_count,
+                    "new_count": new_count,
+                    "change": change,
+                    "change_percent": (change / old_count * 100) if old_count > 0 else 100
+                }
+
+            changes["total_poi_change"] += abs(change)
+
+        # Determine if changes are significant
+        # Significant if: 10+ POIs changed OR 20%+ change OR new/removed categories
+        total_old_pois = sum(data.get("count", 0) for data in old_results.values())
+        if total_old_pois > 0:
+            change_percent = (changes["total_poi_change"] / total_old_pois) * 100
+            changes["total_change_percent"] = change_percent
+
+            changes["has_significant_changes"] = (
+                changes["total_poi_change"] >= 10
+                or change_percent >= 20
+                or len(changes["new_categories"]) > 0
+                or len(changes["removed_categories"]) > 0
+            )
+        else:
+            changes["has_significant_changes"] = changes["total_poi_change"] > 0
+
+        return changes
+
+    async def get_pending_jobs(
+        self,
+        limit: int = 100,
+        session: Optional[AsyncSession] = None
+    ) -> List[POIRefreshJob]:
+        """
+        Get pending jobs that are due for execution.
+
+        Args:
+            limit: Maximum number of jobs to return
+            session: Database session
+
+        Returns:
+            List of pending jobs
+        """
+        async def _get_jobs(db_session: AsyncSession):
+            result = await db_session.execute(
+                select(POIRefreshJob)
+                .where(
+                    and_(
+                        POIRefreshJob.status == "pending",
+                        POIRefreshJob.scheduled_at <= datetime.now(timezone.utc)
+                    )
+                )
+                .order_by(POIRefreshJob.scheduled_at)
+                .limit(limit)
+            )
+            return result.scalars().all()
+
+        if session:
+            return await _get_jobs(session)
+        else:
+            async with database_manager.get_session() as db_session:
+                return await _get_jobs(db_session)
+
+    async def process_pending_jobs(
+        self,
+        max_concurrent: int = 5,
+        session: Optional[AsyncSession] = None
+    ) -> Dict[str, Any]:
+        """
+        Process all pending jobs concurrently.
+
+        Args:
+            max_concurrent: Maximum concurrent job executions
+            session: Database session
+
+        Returns:
+            Processing summary
+        """
+        pending_jobs = await self.get_pending_jobs(session=session)
+
+        if not pending_jobs:
+            logger.info("No pending POI refresh jobs")
+            return {
+                "total_jobs": 0,
+                "successful": 0,
+                "failed": 0,
+                "results": []
+            }
+
+        logger.info(
+            "Processing pending POI refresh jobs",
+            count=len(pending_jobs),
+            max_concurrent=max_concurrent
+        )
+
+        # Process jobs with concurrency limit
+        semaphore = asyncio.Semaphore(max_concurrent)
+
+        async def process_job(job: POIRefreshJob):
+            async with semaphore:
+                return await self.execute_refresh_job(str(job.id))
+
+        results = await asyncio.gather(
+            *[process_job(job) for job in pending_jobs],
+            return_exceptions=True
+        )
+
+        # Summarize results
+        successful = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "success")
+        failed = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "failed")
+        errors = sum(1 for r in results if isinstance(r, Exception))
+
+        summary = {
+            "total_jobs": len(pending_jobs),
+            "successful": successful,
+            "failed": failed + errors,
+            "results": [r if not isinstance(r, Exception) else {"status": "error", "error": str(r)} for r in results]
+        }
+
+        logger.info(
+            "POI refresh jobs processing completed",
+            **summary
+        )
+
+        return summary
--- a/services/external/app/services/poi_scheduler.py
+++ b/services/external/app/services/poi_scheduler.py
@@ -0,0 +1,187 @@
+"""
+POI Refresh Scheduler
+
+Background scheduler for periodic POI context refresh.
+Runs every hour to check for and execute pending POI refresh jobs.
+"""
+
+import asyncio
+from typing import Optional
+from datetime import datetime, timezone
+import structlog
+
+from app.services.poi_refresh_service import POIRefreshService
+
+logger = structlog.get_logger()
+
+
+class POIRefreshScheduler:
+    """
+    POI Refresh Scheduler
+
+    Background task that periodically checks for and executes
+    pending POI refresh jobs.
+    """
+
+    def __init__(
+        self,
+        poi_refresh_service: Optional[POIRefreshService] = None,
+        check_interval_seconds: int = 3600,  # 1 hour
+        max_concurrent_jobs: int = 5
+    ):
+        """
+        Initialize POI refresh scheduler.
+
+        Args:
+            poi_refresh_service: POI refresh service instance
+            check_interval_seconds: Seconds between checks (default: 3600 = 1 hour)
+            max_concurrent_jobs: Max concurrent job executions (default: 5)
+        """
+        self.poi_refresh_service = poi_refresh_service or POIRefreshService()
+        self.check_interval_seconds = check_interval_seconds
+        self.max_concurrent_jobs = max_concurrent_jobs
+
+        self._task: Optional[asyncio.Task] = None
+        self._running = False
+
+        logger.info(
+            "POI Refresh Scheduler initialized",
+            check_interval_seconds=check_interval_seconds,
+            max_concurrent_jobs=max_concurrent_jobs
+        )
+
+    async def start(self):
+        """Start the scheduler background task"""
+        if self._running:
+            logger.warning("POI Refresh Scheduler already running")
+            return
+
+        self._running = True
+        self._task = asyncio.create_task(self._run_scheduler())
+
+        logger.info("POI Refresh Scheduler started")
+
+    async def stop(self):
+        """Stop the scheduler background task"""
+        if not self._running:
+            return
+
+        self._running = False
+
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+
+        logger.info("POI Refresh Scheduler stopped")
+
+    async def _run_scheduler(self):
+        """Main scheduler loop"""
+        logger.info("POI Refresh Scheduler loop started")
+
+        while self._running:
+            try:
+                await self._process_cycle()
+            except Exception as e:
+                logger.error(
+                    "POI refresh scheduler cycle failed",
+                    error=str(e),
+                    exc_info=True
+                )
+
+            # Wait for next cycle
+            try:
+                await asyncio.sleep(self.check_interval_seconds)
+            except asyncio.CancelledError:
+                break
+
+        logger.info("POI Refresh Scheduler loop ended")
+
+    async def _process_cycle(self):
+        """Process one scheduler cycle"""
+        cycle_start = datetime.now(timezone.utc)
+
+        logger.debug(
+            "POI refresh scheduler cycle started",
+            timestamp=cycle_start.isoformat()
+        )
+
+        # Process pending jobs
+        result = await self.poi_refresh_service.process_pending_jobs(
+            max_concurrent=self.max_concurrent_jobs
+        )
+
+        cycle_end = datetime.now(timezone.utc)
+        cycle_duration = (cycle_end - cycle_start).total_seconds()
+
+        if result["total_jobs"] > 0:
+            logger.info(
+                "POI refresh scheduler cycle completed",
+                total_jobs=result["total_jobs"],
+                successful=result["successful"],
+                failed=result["failed"],
+                cycle_duration_seconds=cycle_duration
+            )
+        else:
+            logger.debug(
+                "POI refresh scheduler cycle completed (no jobs)",
+                cycle_duration_seconds=cycle_duration
+            )
+
+    async def trigger_immediate_check(self):
+        """Trigger an immediate check for pending jobs (bypasses schedule)"""
+        logger.info("POI refresh scheduler immediate check triggered")
+
+        try:
+            result = await self.poi_refresh_service.process_pending_jobs(
+                max_concurrent=self.max_concurrent_jobs
+            )
+
+            logger.info(
+                "POI refresh scheduler immediate check completed",
+                total_jobs=result["total_jobs"],
+                successful=result["successful"],
+                failed=result["failed"]
+            )
+
+            return result
+        except Exception as e:
+            logger.error(
+                "POI refresh scheduler immediate check failed",
+                error=str(e),
+                exc_info=True
+            )
+            raise
+
+    @property
+    def is_running(self) -> bool:
+        """Check if scheduler is running"""
+        return self._running
+
+
+# Global scheduler instance
+_scheduler_instance: Optional[POIRefreshScheduler] = None
+
+
+def get_scheduler() -> POIRefreshScheduler:
+    """Get global scheduler instance (singleton)"""
+    global _scheduler_instance
+
+    if _scheduler_instance is None:
+        _scheduler_instance = POIRefreshScheduler()
+
+    return _scheduler_instance
+
+
+async def start_scheduler():
+    """Start global POI refresh scheduler"""
+    scheduler = get_scheduler()
+    await scheduler.start()
+
+
+async def stop_scheduler():
+    """Stop global POI refresh scheduler"""
+    scheduler = get_scheduler()
+    await scheduler.stop()