Files
bakery-ia/shared/utils/city_normalization.py
2025-11-14 07:23:56 +01:00

128 lines
3.3 KiB
Python

"""
City normalization utilities for converting free-text city names to normalized city IDs.
This module provides functions to normalize city names from tenant registration
(which are free-text strings) to standardized city_id values used by the
school calendar and location context systems.
"""
from typing import Optional
import logging
logger = logging.getLogger(__name__)
# Mapping of common city name variations to normalized city IDs
CITY_NAME_TO_ID_MAP = {
# Madrid variations
"Madrid": "madrid",
"madrid": "madrid",
"MADRID": "madrid",
# Barcelona variations
"Barcelona": "barcelona",
"barcelona": "barcelona",
"BARCELONA": "barcelona",
# Valencia variations
"Valencia": "valencia",
"valencia": "valencia",
"VALENCIA": "valencia",
# Seville variations
"Sevilla": "sevilla",
"sevilla": "sevilla",
"Seville": "sevilla",
"seville": "sevilla",
# Bilbao variations
"Bilbao": "bilbao",
"bilbao": "bilbao",
# Add more cities as needed
}
def normalize_city_id(city_name: Optional[str]) -> Optional[str]:
"""
Convert a free-text city name to a normalized city_id.
This function handles various capitalizations and spellings of city names,
converting them to standardized lowercase identifiers used by the
location context and school calendar systems.
Args:
city_name: Free-text city name from tenant registration (e.g., "Madrid", "MADRID")
Returns:
Normalized city_id (e.g., "madrid") or None if city_name is None
Falls back to lowercase city_name if not in mapping
Examples:
>>> normalize_city_id("Madrid")
'madrid'
>>> normalize_city_id("BARCELONA")
'barcelona'
>>> normalize_city_id("Unknown City")
'unknown city'
>>> normalize_city_id(None)
None
"""
if city_name is None:
return None
# Strip whitespace
city_name = city_name.strip()
if not city_name:
logger.warning("Empty city name provided to normalize_city_id")
return None
# Check if we have an explicit mapping
if city_name in CITY_NAME_TO_ID_MAP:
return CITY_NAME_TO_ID_MAP[city_name]
# Fallback: convert to lowercase for consistency
normalized = city_name.lower()
logger.info(
f"City name '{city_name}' not in explicit mapping, using lowercase fallback: '{normalized}'"
)
return normalized
def is_city_supported(city_id: str) -> bool:
"""
Check if a city has school calendars configured.
Currently only Madrid has school calendars in the system.
This function can be updated as more cities are added.
Args:
city_id: Normalized city_id (e.g., "madrid")
Returns:
True if the city has school calendars configured, False otherwise
Examples:
>>> is_city_supported("madrid")
True
>>> is_city_supported("barcelona")
False
"""
# Currently only Madrid has school calendars configured
supported_cities = {"madrid"}
return city_id in supported_cities
def get_supported_cities() -> list[str]:
"""
Get list of city IDs that have school calendars configured.
Returns:
List of supported city_id values
Examples:
>>> get_supported_cities()
['madrid']
"""
return ["madrid"]