imporve features
This commit is contained in:
123
services/external/app/api/calendar_operations.py
vendored
123
services/external/app/api/calendar_operations.py
vendored
@@ -213,17 +213,17 @@ async def check_is_school_holiday(
|
||||
response_model=TenantLocationContextResponse
|
||||
)
|
||||
async def get_tenant_location_context(
|
||||
tenant_id: UUID = Depends(get_current_user_dep),
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get location context for a tenant including school calendar assignment (cached)"""
|
||||
try:
|
||||
tenant_id_str = str(tenant_id)
|
||||
|
||||
# Check cache first
|
||||
cached = await cache.get_cached_tenant_context(tenant_id_str)
|
||||
cached = await cache.get_cached_tenant_context(tenant_id)
|
||||
if cached:
|
||||
logger.debug("Returning cached tenant context", tenant_id=tenant_id_str)
|
||||
logger.debug("Returning cached tenant context", tenant_id=tenant_id)
|
||||
return TenantLocationContextResponse(**cached)
|
||||
|
||||
# Cache miss - fetch from database
|
||||
@@ -261,11 +261,16 @@ async def get_tenant_location_context(
|
||||
)
|
||||
async def create_or_update_tenant_location_context(
|
||||
request: TenantLocationContextCreateRequest,
|
||||
tenant_id: UUID = Depends(get_current_user_dep),
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Create or update tenant location context"""
|
||||
try:
|
||||
|
||||
# Convert to UUID for use with repository
|
||||
tenant_uuid = UUID(tenant_id)
|
||||
|
||||
repo = CalendarRepository(db)
|
||||
|
||||
# Validate calendar_id if provided
|
||||
@@ -279,7 +284,7 @@ async def create_or_update_tenant_location_context(
|
||||
|
||||
# Create or update context
|
||||
context_obj = await repo.create_or_update_tenant_location_context(
|
||||
tenant_id=tenant_id,
|
||||
tenant_id=tenant_uuid,
|
||||
city_id=request.city_id,
|
||||
school_calendar_id=request.school_calendar_id,
|
||||
neighborhood=request.neighborhood,
|
||||
@@ -288,13 +293,13 @@ async def create_or_update_tenant_location_context(
|
||||
)
|
||||
|
||||
# Invalidate cache since context was updated
|
||||
await cache.invalidate_tenant_context(str(tenant_id))
|
||||
await cache.invalidate_tenant_context(tenant_id)
|
||||
|
||||
# Get full context with calendar details
|
||||
context = await repo.get_tenant_with_calendar(tenant_id)
|
||||
context = await repo.get_tenant_with_calendar(tenant_uuid)
|
||||
|
||||
# Cache the new context
|
||||
await cache.set_cached_tenant_context(str(tenant_id), context)
|
||||
await cache.set_cached_tenant_context(tenant_id, context)
|
||||
|
||||
return TenantLocationContextResponse(**context)
|
||||
|
||||
@@ -317,13 +322,18 @@ async def create_or_update_tenant_location_context(
|
||||
status_code=204
|
||||
)
|
||||
async def delete_tenant_location_context(
|
||||
tenant_id: UUID = Depends(get_current_user_dep),
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Delete tenant location context"""
|
||||
try:
|
||||
|
||||
# Convert to UUID for use with repository
|
||||
tenant_uuid = UUID(tenant_id)
|
||||
|
||||
repo = CalendarRepository(db)
|
||||
deleted = await repo.delete_tenant_location_context(tenant_id)
|
||||
deleted = await repo.delete_tenant_location_context(tenant_uuid)
|
||||
|
||||
if not deleted:
|
||||
raise HTTPException(
|
||||
@@ -347,6 +357,97 @@ async def delete_tenant_location_context(
|
||||
)
|
||||
|
||||
|
||||
# ===== Calendar Suggestion Endpoint =====
|
||||
|
||||
@router.post(
|
||||
route_builder.build_base_route("location-context/suggest-calendar")
|
||||
)
|
||||
async def suggest_calendar_for_tenant(
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Suggest an appropriate school calendar for a tenant based on location and POI data.
|
||||
|
||||
This endpoint analyzes:
|
||||
- Tenant's city location
|
||||
- Detected schools nearby (from POI detection)
|
||||
- Available calendars for the city
|
||||
- Bakery-specific heuristics (primary schools = stronger morning rush)
|
||||
|
||||
Returns a suggestion with confidence score and reasoning.
|
||||
Does NOT automatically assign - requires admin approval.
|
||||
"""
|
||||
try:
|
||||
from app.utils.calendar_suggester import CalendarSuggester
|
||||
from app.repositories.poi_context_repository import POIContextRepository
|
||||
|
||||
tenant_uuid = UUID(tenant_id)
|
||||
|
||||
# Get tenant's location context
|
||||
calendar_repo = CalendarRepository(db)
|
||||
location_context = await calendar_repo.get_tenant_location_context(tenant_uuid)
|
||||
|
||||
if not location_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Location context not found. Create location context first."
|
||||
)
|
||||
|
||||
city_id = location_context.city_id
|
||||
|
||||
# Get available calendars for city
|
||||
calendars_result = await calendar_repo.get_calendars_by_city(city_id, enabled_only=True)
|
||||
calendars = calendars_result.get("calendars", []) if calendars_result else []
|
||||
|
||||
# Get POI context if available
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
poi_data = poi_context.to_dict() if poi_context else None
|
||||
|
||||
# Generate suggestion
|
||||
suggester = CalendarSuggester()
|
||||
suggestion = suggester.suggest_calendar_for_tenant(
|
||||
city_id=city_id,
|
||||
available_calendars=calendars,
|
||||
poi_context=poi_data,
|
||||
tenant_data=None # Could include tenant info if needed
|
||||
)
|
||||
|
||||
# Format for admin display
|
||||
admin_message = suggester.format_suggestion_for_admin(suggestion)
|
||||
|
||||
logger.info(
|
||||
"Calendar suggestion generated",
|
||||
tenant_id=tenant_id,
|
||||
city_id=city_id,
|
||||
suggested_calendar=suggestion.get("suggested_calendar_id"),
|
||||
confidence=suggestion.get("confidence")
|
||||
)
|
||||
|
||||
return {
|
||||
**suggestion,
|
||||
"admin_message": admin_message,
|
||||
"tenant_id": tenant_id,
|
||||
"current_calendar_id": str(location_context.school_calendar_id) if location_context.school_calendar_id else None
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error generating calendar suggestion",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error generating calendar suggestion: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# ===== Helper Endpoints =====
|
||||
|
||||
@router.get(
|
||||
|
||||
82
services/external/app/api/poi_context.py
vendored
82
services/external/app/api/poi_context.py
vendored
@@ -21,10 +21,10 @@ from app.core.redis_client import get_redis_client
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
router = APIRouter(prefix="/poi-context", tags=["POI Context"])
|
||||
router = APIRouter(prefix="/tenants", tags=["POI Context"])
|
||||
|
||||
|
||||
@router.post("/{tenant_id}/detect")
|
||||
@router.post("/{tenant_id}/poi-context/detect")
|
||||
async def detect_pois_for_tenant(
|
||||
tenant_id: str,
|
||||
latitude: float = Query(..., description="Bakery latitude"),
|
||||
@@ -209,13 +209,79 @@ async def detect_pois_for_tenant(
|
||||
relevant_categories=len(feature_selection.get("relevant_categories", []))
|
||||
)
|
||||
|
||||
# Phase 3: Auto-trigger calendar suggestion after POI detection
|
||||
# This helps admins by providing intelligent calendar recommendations
|
||||
calendar_suggestion = None
|
||||
try:
|
||||
from app.utils.calendar_suggester import CalendarSuggester
|
||||
from app.repositories.calendar_repository import CalendarRepository
|
||||
|
||||
# Get tenant's location context
|
||||
calendar_repo = CalendarRepository(db)
|
||||
location_context = await calendar_repo.get_tenant_location_context(tenant_uuid)
|
||||
|
||||
if location_context and location_context.school_calendar_id is None:
|
||||
# Only suggest if no calendar assigned yet
|
||||
city_id = location_context.city_id
|
||||
|
||||
# Get available calendars for city
|
||||
calendars_result = await calendar_repo.get_calendars_by_city(city_id, enabled_only=True)
|
||||
calendars = calendars_result.get("calendars", []) if calendars_result else []
|
||||
|
||||
if calendars:
|
||||
# Generate suggestion using POI data
|
||||
suggester = CalendarSuggester()
|
||||
calendar_suggestion = suggester.suggest_calendar_for_tenant(
|
||||
city_id=city_id,
|
||||
available_calendars=calendars,
|
||||
poi_context=poi_context.to_dict(),
|
||||
tenant_data=None
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Calendar suggestion auto-generated after POI detection",
|
||||
tenant_id=tenant_id,
|
||||
suggested_calendar=calendar_suggestion.get("calendar_name"),
|
||||
confidence=calendar_suggestion.get("confidence_percentage"),
|
||||
should_auto_assign=calendar_suggestion.get("should_auto_assign")
|
||||
)
|
||||
|
||||
# TODO: Send notification to admin about available suggestion
|
||||
# This will be implemented when notification service is integrated
|
||||
else:
|
||||
logger.info(
|
||||
"No calendars available for city, skipping suggestion",
|
||||
tenant_id=tenant_id,
|
||||
city_id=city_id
|
||||
)
|
||||
elif location_context and location_context.school_calendar_id:
|
||||
logger.info(
|
||||
"Calendar already assigned, skipping suggestion",
|
||||
tenant_id=tenant_id,
|
||||
calendar_id=str(location_context.school_calendar_id)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"No location context found, skipping calendar suggestion",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Non-blocking: POI detection should succeed even if suggestion fails
|
||||
logger.warning(
|
||||
"Failed to auto-generate calendar suggestion (non-blocking)",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"source": "detection",
|
||||
"poi_context": poi_context.to_dict(),
|
||||
"feature_selection": feature_selection,
|
||||
"competitor_analysis": competitor_analysis,
|
||||
"competitive_insights": competitive_insights
|
||||
"competitive_insights": competitive_insights,
|
||||
"calendar_suggestion": calendar_suggestion # Include suggestion in response
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
@@ -231,7 +297,7 @@ async def detect_pois_for_tenant(
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{tenant_id}")
|
||||
@router.get("/{tenant_id}/poi-context")
|
||||
async def get_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
@@ -265,7 +331,7 @@ async def get_poi_context(
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{tenant_id}/refresh")
|
||||
@router.post("/{tenant_id}/poi-context/refresh")
|
||||
async def refresh_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
@@ -299,7 +365,7 @@ async def refresh_poi_context(
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{tenant_id}")
|
||||
@router.delete("/{tenant_id}/poi-context")
|
||||
async def delete_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
@@ -327,7 +393,7 @@ async def delete_poi_context(
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/feature-importance")
|
||||
@router.get("/{tenant_id}/poi-context/feature-importance")
|
||||
async def get_feature_importance(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
@@ -364,7 +430,7 @@ async def get_feature_importance(
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/competitor-analysis")
|
||||
@router.get("/{tenant_id}/poi-context/competitor-analysis")
|
||||
async def get_competitor_analysis(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
|
||||
342
services/external/app/utils/calendar_suggester.py
vendored
Normal file
342
services/external/app/utils/calendar_suggester.py
vendored
Normal file
@@ -0,0 +1,342 @@
|
||||
"""
|
||||
Calendar Suggester Utility
|
||||
|
||||
Provides intelligent school calendar suggestions based on POI detection data,
|
||||
tenant location, and heuristics optimized for bakery demand forecasting.
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict, List, Any, Tuple
|
||||
from datetime import datetime, date, timezone
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CalendarSuggester:
|
||||
"""
|
||||
Suggests appropriate school calendars for tenants based on location context.
|
||||
|
||||
Uses POI detection data, proximity analysis, and bakery-specific heuristics
|
||||
to provide intelligent calendar recommendations with confidence scores.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
|
||||
def suggest_calendar_for_tenant(
|
||||
self,
|
||||
city_id: str,
|
||||
available_calendars: List[Dict[str, Any]],
|
||||
poi_context: Optional[Dict[str, Any]] = None,
|
||||
tenant_data: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Suggest the most appropriate calendar for a tenant.
|
||||
|
||||
Args:
|
||||
city_id: Normalized city ID (e.g., "madrid")
|
||||
available_calendars: List of available school calendars for the city
|
||||
poi_context: Optional POI detection results including school data
|
||||
tenant_data: Optional tenant information (location, etc.)
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- suggested_calendar_id: UUID of suggested calendar or None
|
||||
- calendar_name: Name of suggested calendar
|
||||
- confidence: Float 0.0-1.0 confidence score
|
||||
- reasoning: List of reasoning steps
|
||||
- fallback_calendars: Alternative suggestions
|
||||
- should_assign: Boolean recommendation to auto-assign
|
||||
"""
|
||||
if not available_calendars:
|
||||
return self._no_calendars_available(city_id)
|
||||
|
||||
# Get current academic year
|
||||
academic_year = self._get_current_academic_year()
|
||||
|
||||
# Filter calendars for current academic year
|
||||
current_year_calendars = [
|
||||
cal for cal in available_calendars
|
||||
if cal.get("academic_year") == academic_year
|
||||
]
|
||||
|
||||
if not current_year_calendars:
|
||||
# Fallback to any calendar if current year not available
|
||||
current_year_calendars = available_calendars
|
||||
self.logger.warning(
|
||||
"No calendars for current academic year, using all available",
|
||||
city_id=city_id,
|
||||
academic_year=academic_year
|
||||
)
|
||||
|
||||
# Analyze POI context if available
|
||||
school_analysis = self._analyze_schools_from_poi(poi_context) if poi_context else None
|
||||
|
||||
# Apply bakery-specific heuristics
|
||||
suggestion = self._apply_suggestion_heuristics(
|
||||
current_year_calendars,
|
||||
school_analysis,
|
||||
city_id
|
||||
)
|
||||
|
||||
return suggestion
|
||||
|
||||
def _get_current_academic_year(self) -> str:
|
||||
"""
|
||||
Determine current academic year based on date.
|
||||
|
||||
Academic year runs September to June (Spain):
|
||||
- Jan-Aug: Previous year (e.g., 2024-2025)
|
||||
- Sep-Dec: Current year (e.g., 2025-2026)
|
||||
|
||||
Returns:
|
||||
Academic year string (e.g., "2024-2025")
|
||||
"""
|
||||
today = date.today()
|
||||
year = today.year
|
||||
|
||||
# Academic year starts in September
|
||||
if today.month >= 9: # September onwards
|
||||
return f"{year}-{year + 1}"
|
||||
else: # January-August
|
||||
return f"{year - 1}-{year}"
|
||||
|
||||
def _analyze_schools_from_poi(
|
||||
self,
|
||||
poi_context: Dict[str, Any]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze school POIs to infer school type preferences.
|
||||
|
||||
Args:
|
||||
poi_context: POI detection results
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- has_schools_nearby: Boolean
|
||||
- school_count: Int count of schools
|
||||
- nearest_distance: Float distance to nearest school (meters)
|
||||
- proximity_score: Float proximity score
|
||||
- school_names: List of detected school names
|
||||
"""
|
||||
try:
|
||||
poi_results = poi_context.get("poi_detection_results", {})
|
||||
schools_data = poi_results.get("schools", {})
|
||||
|
||||
if not schools_data:
|
||||
return None
|
||||
|
||||
school_pois = schools_data.get("pois", [])
|
||||
school_count = len(school_pois)
|
||||
|
||||
if school_count == 0:
|
||||
return None
|
||||
|
||||
# Extract school details
|
||||
school_names = [
|
||||
poi.get("name", "Unknown School")
|
||||
for poi in school_pois
|
||||
if poi.get("name")
|
||||
]
|
||||
|
||||
# Get proximity metrics
|
||||
features = schools_data.get("features", {})
|
||||
proximity_score = features.get("proximity_score", 0.0)
|
||||
|
||||
# Calculate nearest distance (approximate from POI data)
|
||||
nearest_distance = None
|
||||
if school_pois:
|
||||
# If we have POIs, estimate nearest distance
|
||||
# This is approximate - exact calculation would require tenant coords
|
||||
nearest_distance = 100.0 # Default assumption if schools detected
|
||||
|
||||
return {
|
||||
"has_schools_nearby": True,
|
||||
"school_count": school_count,
|
||||
"nearest_distance": nearest_distance,
|
||||
"proximity_score": proximity_score,
|
||||
"school_names": school_names
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(
|
||||
"Failed to analyze schools from POI",
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
|
||||
def _apply_suggestion_heuristics(
|
||||
self,
|
||||
calendars: List[Dict[str, Any]],
|
||||
school_analysis: Optional[Dict[str, Any]],
|
||||
city_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply heuristics to suggest best calendar.
|
||||
|
||||
Bakery-specific heuristics:
|
||||
1. If schools detected nearby -> Prefer primary (stronger morning rush)
|
||||
2. If no schools detected -> Still suggest primary (more common, safer default)
|
||||
3. Primary schools have stronger impact on bakery traffic
|
||||
|
||||
Args:
|
||||
calendars: List of available calendars
|
||||
school_analysis: Analysis of nearby schools
|
||||
city_id: City identifier
|
||||
|
||||
Returns:
|
||||
Suggestion dict with confidence and reasoning
|
||||
"""
|
||||
reasoning = []
|
||||
confidence = 0.0
|
||||
|
||||
# Separate calendars by type
|
||||
primary_calendars = [c for c in calendars if c.get("school_type") == "primary"]
|
||||
secondary_calendars = [c for c in calendars if c.get("school_type") == "secondary"]
|
||||
other_calendars = [c for c in calendars if c.get("school_type") not in ["primary", "secondary"]]
|
||||
|
||||
# Heuristic 1: Schools detected nearby
|
||||
if school_analysis and school_analysis.get("has_schools_nearby"):
|
||||
school_count = school_analysis.get("school_count", 0)
|
||||
proximity_score = school_analysis.get("proximity_score", 0.0)
|
||||
|
||||
reasoning.append(f"Detected {school_count} schools nearby (proximity score: {proximity_score:.2f})")
|
||||
|
||||
if primary_calendars:
|
||||
suggested = primary_calendars[0]
|
||||
confidence = min(0.85, 0.65 + (proximity_score * 0.1)) # 65-85% confidence
|
||||
reasoning.append("Primary schools create strong morning rush (7:30-9am drop-off)")
|
||||
reasoning.append("Primary calendars recommended for bakeries near schools")
|
||||
elif secondary_calendars:
|
||||
suggested = secondary_calendars[0]
|
||||
confidence = 0.70
|
||||
reasoning.append("Secondary school calendars available (later morning start)")
|
||||
else:
|
||||
suggested = calendars[0]
|
||||
confidence = 0.50
|
||||
reasoning.append("Using available calendar (school type not specified)")
|
||||
|
||||
# Heuristic 2: No schools detected
|
||||
else:
|
||||
reasoning.append("No schools detected within 500m radius")
|
||||
|
||||
if primary_calendars:
|
||||
suggested = primary_calendars[0]
|
||||
confidence = 0.60 # Lower confidence without detected schools
|
||||
reasoning.append("Defaulting to primary calendar (more common, safer choice)")
|
||||
reasoning.append("Primary school holidays still affect general foot traffic")
|
||||
elif secondary_calendars:
|
||||
suggested = secondary_calendars[0]
|
||||
confidence = 0.55
|
||||
reasoning.append("Secondary calendar available as default")
|
||||
elif other_calendars:
|
||||
suggested = other_calendars[0]
|
||||
confidence = 0.50
|
||||
reasoning.append("Using available calendar")
|
||||
else:
|
||||
suggested = calendars[0]
|
||||
confidence = 0.45
|
||||
reasoning.append("No preferred calendar type available")
|
||||
|
||||
# Confidence adjustment based on school analysis quality
|
||||
if school_analysis:
|
||||
if school_analysis.get("school_count", 0) >= 3:
|
||||
confidence = min(1.0, confidence + 0.05) # Boost for multiple schools
|
||||
reasoning.append("High confidence: Multiple schools detected")
|
||||
|
||||
proximity = school_analysis.get("proximity_score", 0.0)
|
||||
if proximity > 2.0:
|
||||
confidence = min(1.0, confidence + 0.05) # Boost for close proximity
|
||||
reasoning.append("High confidence: Schools very close to bakery")
|
||||
|
||||
# Determine if we should auto-assign
|
||||
# Only auto-assign if confidence >= 75% AND schools detected
|
||||
should_auto_assign = (
|
||||
confidence >= 0.75 and
|
||||
school_analysis is not None and
|
||||
school_analysis.get("has_schools_nearby", False)
|
||||
)
|
||||
|
||||
# Build fallback suggestions
|
||||
fallback_calendars = []
|
||||
for cal in calendars:
|
||||
if cal.get("id") != suggested.get("id"):
|
||||
fallback_calendars.append({
|
||||
"calendar_id": str(cal.get("id")),
|
||||
"calendar_name": cal.get("name"),
|
||||
"school_type": cal.get("school_type"),
|
||||
"academic_year": cal.get("academic_year")
|
||||
})
|
||||
|
||||
return {
|
||||
"suggested_calendar_id": str(suggested.get("id")),
|
||||
"calendar_name": suggested.get("name"),
|
||||
"school_type": suggested.get("school_type"),
|
||||
"academic_year": suggested.get("academic_year"),
|
||||
"confidence": round(confidence, 2),
|
||||
"confidence_percentage": round(confidence * 100, 1),
|
||||
"reasoning": reasoning,
|
||||
"fallback_calendars": fallback_calendars[:2], # Top 2 alternatives
|
||||
"should_auto_assign": should_auto_assign,
|
||||
"school_analysis": school_analysis,
|
||||
"city_id": city_id
|
||||
}
|
||||
|
||||
def _no_calendars_available(self, city_id: str) -> Dict[str, Any]:
|
||||
"""Return response when no calendars available for city."""
|
||||
return {
|
||||
"suggested_calendar_id": None,
|
||||
"calendar_name": None,
|
||||
"school_type": None,
|
||||
"academic_year": None,
|
||||
"confidence": 0.0,
|
||||
"confidence_percentage": 0.0,
|
||||
"reasoning": [
|
||||
f"No school calendars configured for city: {city_id}",
|
||||
"Calendar assignment not possible at this time",
|
||||
"Location context created without calendar (can be added later)"
|
||||
],
|
||||
"fallback_calendars": [],
|
||||
"should_auto_assign": False,
|
||||
"school_analysis": None,
|
||||
"city_id": city_id
|
||||
}
|
||||
|
||||
def format_suggestion_for_admin(self, suggestion: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format suggestion as human-readable text for admin UI.
|
||||
|
||||
Args:
|
||||
suggestion: Suggestion dict from suggest_calendar_for_tenant
|
||||
|
||||
Returns:
|
||||
Formatted string for display
|
||||
"""
|
||||
if not suggestion.get("suggested_calendar_id"):
|
||||
return f"⚠️ No calendars available for {suggestion.get('city_id', 'this city')}"
|
||||
|
||||
confidence_pct = suggestion.get("confidence_percentage", 0)
|
||||
calendar_name = suggestion.get("calendar_name", "Unknown")
|
||||
school_type = suggestion.get("school_type", "").capitalize()
|
||||
|
||||
# Confidence emoji
|
||||
if confidence_pct >= 80:
|
||||
emoji = "✅"
|
||||
elif confidence_pct >= 60:
|
||||
emoji = "📊"
|
||||
else:
|
||||
emoji = "💡"
|
||||
|
||||
text = f"{emoji} **Suggested**: {calendar_name}\n"
|
||||
text += f"**Type**: {school_type} | **Confidence**: {confidence_pct}%\n\n"
|
||||
text += "**Reasoning**:\n"
|
||||
|
||||
for reason in suggestion.get("reasoning", []):
|
||||
text += f"• {reason}\n"
|
||||
|
||||
if suggestion.get("fallback_calendars"):
|
||||
text += "\n**Alternatives**:\n"
|
||||
for alt in suggestion.get("fallback_calendars", [])[:2]:
|
||||
text += f"• {alt.get('calendar_name')} ({alt.get('school_type')})\n"
|
||||
|
||||
return text
|
||||
@@ -56,21 +56,17 @@ class BakeryForecaster:
|
||||
from app.services.poi_feature_service import POIFeatureService
|
||||
self.poi_feature_service = POIFeatureService()
|
||||
|
||||
# Initialize enhanced data processor from shared module
|
||||
if use_enhanced_features:
|
||||
# Import enhanced data processor from training service
|
||||
import sys
|
||||
import os
|
||||
# Add training service to path
|
||||
training_path = os.path.join(os.path.dirname(__file__), '../../../training')
|
||||
if training_path not in sys.path:
|
||||
sys.path.insert(0, training_path)
|
||||
|
||||
try:
|
||||
from app.ml.data_processor import EnhancedBakeryDataProcessor
|
||||
self.data_processor = EnhancedBakeryDataProcessor(database_manager)
|
||||
logger.info("Enhanced features enabled for forecasting")
|
||||
from shared.ml.data_processor import EnhancedBakeryDataProcessor
|
||||
self.data_processor = EnhancedBakeryDataProcessor(region='MD')
|
||||
logger.info("Enhanced features enabled using shared data processor")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Could not import EnhancedBakeryDataProcessor: {e}, falling back to basic features")
|
||||
logger.warning(
|
||||
f"Could not import EnhancedBakeryDataProcessor from shared module: {e}. "
|
||||
"Falling back to basic features."
|
||||
)
|
||||
self.use_enhanced_features = False
|
||||
self.data_processor = None
|
||||
else:
|
||||
|
||||
@@ -1056,13 +1056,13 @@ class EnhancedForecastingService:
|
||||
- External service is unavailable
|
||||
"""
|
||||
try:
|
||||
# Get tenant's calendar ID
|
||||
calendar_id = await self.data_client.get_tenant_calendar(tenant_id)
|
||||
# Get tenant's calendar information
|
||||
calendar_info = await self.data_client.fetch_tenant_calendar(tenant_id)
|
||||
|
||||
if calendar_id:
|
||||
if calendar_info:
|
||||
# Check school holiday via external service
|
||||
is_school_holiday = await self.data_client.check_school_holiday(
|
||||
calendar_id=calendar_id,
|
||||
calendar_id=calendar_info["calendar_id"],
|
||||
check_date=date_obj.isoformat(),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
@@ -206,13 +206,39 @@ class PredictionService:
|
||||
|
||||
# Calculate confidence interval
|
||||
confidence_interval = upper_bound - lower_bound
|
||||
|
||||
|
||||
# Adjust confidence based on data freshness if historical features were calculated
|
||||
adjusted_confidence_level = confidence_level
|
||||
data_availability_score = features.get('historical_data_availability_score', 1.0) # Default to 1.0 if not available
|
||||
|
||||
# Reduce confidence if historical data is significantly old
|
||||
if data_availability_score < 0.5:
|
||||
# For data availability score < 0.5 (more than 90 days old), reduce confidence
|
||||
adjusted_confidence_level = max(0.6, confidence_level * data_availability_score)
|
||||
|
||||
# Increase confidence interval to reflect uncertainty
|
||||
adjustment_factor = 1.0 + (0.5 * (1.0 - data_availability_score)) # Up to 50% wider interval
|
||||
adjusted_lower_bound = prediction_value - (prediction_value - lower_bound) * adjustment_factor
|
||||
adjusted_upper_bound = prediction_value + (upper_bound - prediction_value) * adjustment_factor
|
||||
|
||||
logger.info("Adjusted prediction confidence due to stale historical data",
|
||||
original_confidence=confidence_level,
|
||||
adjusted_confidence=adjusted_confidence_level,
|
||||
data_availability_score=data_availability_score,
|
||||
original_interval=confidence_interval,
|
||||
adjusted_interval=adjusted_upper_bound - adjusted_lower_bound)
|
||||
|
||||
lower_bound = max(0, adjusted_lower_bound)
|
||||
upper_bound = adjusted_upper_bound
|
||||
confidence_interval = upper_bound - lower_bound
|
||||
|
||||
result = {
|
||||
"prediction": max(0, prediction_value), # Ensure non-negative
|
||||
"lower_bound": max(0, lower_bound),
|
||||
"upper_bound": max(0, upper_bound),
|
||||
"confidence_interval": confidence_interval,
|
||||
"confidence_level": confidence_level
|
||||
"confidence_level": adjusted_confidence_level,
|
||||
"data_freshness_score": data_availability_score # Include data freshness in result
|
||||
}
|
||||
|
||||
# Record metrics
|
||||
@@ -222,35 +248,45 @@ class PredictionService:
|
||||
# Register metrics if not already registered
|
||||
if "prediction_processing_time" not in metrics._histograms:
|
||||
metrics.register_histogram(
|
||||
"prediction_processing_time",
|
||||
"Time taken to process predictions",
|
||||
"prediction_processing_time",
|
||||
"Time taken to process predictions",
|
||||
labels=['service', 'model_type']
|
||||
)
|
||||
|
||||
|
||||
if "predictions_served_total" not in metrics._counters:
|
||||
try:
|
||||
metrics.register_counter(
|
||||
"predictions_served_total",
|
||||
"Total number of predictions served",
|
||||
"predictions_served_total",
|
||||
"Total number of predictions served",
|
||||
labels=['service', 'status']
|
||||
)
|
||||
except Exception as reg_error:
|
||||
# Metric might already exist in global registry
|
||||
logger.debug("Counter already exists in registry", error=str(reg_error))
|
||||
|
||||
# Now record the metrics
|
||||
metrics.observe_histogram(
|
||||
"prediction_processing_time",
|
||||
processing_time,
|
||||
labels={'service': 'forecasting-service', 'model_type': 'prophet'}
|
||||
)
|
||||
metrics.increment_counter(
|
||||
"predictions_served_total",
|
||||
labels={'service': 'forecasting-service', 'status': 'success'}
|
||||
)
|
||||
|
||||
# Now record the metrics - try with expected labels, fallback if needed
|
||||
try:
|
||||
metrics.observe_histogram(
|
||||
"prediction_processing_time",
|
||||
processing_time,
|
||||
labels={'service': 'forecasting-service', 'model_type': 'prophet'}
|
||||
)
|
||||
metrics.increment_counter(
|
||||
"predictions_served_total",
|
||||
labels={'service': 'forecasting-service', 'status': 'success'}
|
||||
)
|
||||
except Exception as label_error:
|
||||
# If specific labels fail, try without labels to avoid breaking predictions
|
||||
logger.warning("Failed to record metrics with labels, trying without", error=str(label_error))
|
||||
try:
|
||||
metrics.observe_histogram("prediction_processing_time", processing_time)
|
||||
metrics.increment_counter("predictions_served_total")
|
||||
except Exception as no_label_error:
|
||||
logger.warning("Failed to record metrics even without labels", error=str(no_label_error))
|
||||
|
||||
except Exception as metrics_error:
|
||||
# Log metrics error but don't fail the prediction
|
||||
logger.warning("Failed to record metrics", error=str(metrics_error))
|
||||
logger.warning("Failed to register or record metrics", error=str(metrics_error))
|
||||
|
||||
logger.info("Prediction generated successfully",
|
||||
model_id=model_id,
|
||||
@@ -260,22 +296,32 @@ class PredictionService:
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating prediction",
|
||||
error=str(e),
|
||||
logger.error("Error generating prediction",
|
||||
error=str(e),
|
||||
model_id=model_id)
|
||||
# Record error metrics with robust error handling
|
||||
try:
|
||||
if "prediction_errors_total" not in metrics._counters:
|
||||
metrics.register_counter(
|
||||
"prediction_errors_total",
|
||||
"Total number of prediction errors",
|
||||
"prediction_errors_total",
|
||||
"Total number of prediction errors",
|
||||
labels=['service', 'error_type']
|
||||
)
|
||||
metrics.increment_counter(
|
||||
"prediction_errors_total",
|
||||
labels={'service': 'forecasting-service', 'error_type': 'prediction_failed'}
|
||||
)
|
||||
except Exception:
|
||||
pass # Don't fail on metrics errors
|
||||
|
||||
# Try with labels first, then without if that fails
|
||||
try:
|
||||
metrics.increment_counter(
|
||||
"prediction_errors_total",
|
||||
labels={'service': 'forecasting-service', 'error_type': 'prediction_failed'}
|
||||
)
|
||||
except Exception as label_error:
|
||||
logger.debug("Failed to record error metrics with labels", error=str(label_error))
|
||||
try:
|
||||
metrics.increment_counter("prediction_errors_total")
|
||||
except Exception as no_label_error:
|
||||
logger.warning("Failed to record error metrics even without labels", error=str(no_label_error))
|
||||
except Exception as registration_error:
|
||||
logger.warning("Failed to register error metrics", error=str(registration_error))
|
||||
raise
|
||||
|
||||
async def predict_with_weather_forecast(
|
||||
@@ -353,6 +399,33 @@ class PredictionService:
|
||||
'weather_description': day_weather.get('description', 'Clear')
|
||||
})
|
||||
|
||||
# CRITICAL FIX: Fetch historical sales data and calculate historical features
|
||||
# This populates lag, rolling, and trend features for better predictions
|
||||
# Using 90 days for better trend analysis and more robust rolling statistics
|
||||
if 'tenant_id' in enriched_features and 'inventory_product_id' in enriched_features and 'date' in enriched_features:
|
||||
try:
|
||||
forecast_date = pd.to_datetime(enriched_features['date'])
|
||||
historical_sales = await self._fetch_historical_sales(
|
||||
tenant_id=enriched_features['tenant_id'],
|
||||
inventory_product_id=enriched_features['inventory_product_id'],
|
||||
forecast_date=forecast_date,
|
||||
days_back=90 # Changed from 30 to 90 for better historical context
|
||||
)
|
||||
|
||||
# Calculate historical features and merge into features dict
|
||||
historical_features = self._calculate_historical_features(
|
||||
historical_sales, forecast_date
|
||||
)
|
||||
enriched_features.update(historical_features)
|
||||
|
||||
logger.info("Historical features enriched",
|
||||
lag_1_day=historical_features.get('lag_1_day'),
|
||||
rolling_mean_7d=historical_features.get('rolling_mean_7d'))
|
||||
except Exception as e:
|
||||
logger.warning("Failed to enrich with historical features, using defaults",
|
||||
error=str(e))
|
||||
# Features dict will use defaults (0.0) from _prepare_prophet_features
|
||||
|
||||
# Prepare Prophet dataframe with weather features
|
||||
prophet_df = self._prepare_prophet_features(enriched_features)
|
||||
|
||||
@@ -363,6 +436,29 @@ class PredictionService:
|
||||
lower_bound = float(forecast['yhat_lower'].iloc[0])
|
||||
upper_bound = float(forecast['yhat_upper'].iloc[0])
|
||||
|
||||
# Calculate confidence adjustment based on data freshness
|
||||
current_confidence_level = confidence_level
|
||||
data_availability_score = enriched_features.get('historical_data_availability_score', 1.0) # Default to 1.0 if not available
|
||||
|
||||
# Adjust confidence based on data freshness if historical features were calculated
|
||||
# Reduce confidence if historical data is significantly old
|
||||
if data_availability_score < 0.5:
|
||||
# For data availability score < 0.5 (more than 90 days old), reduce confidence
|
||||
current_confidence_level = max(0.6, confidence_level * data_availability_score)
|
||||
|
||||
# Increase confidence interval to reflect uncertainty
|
||||
adjustment_factor = 1.0 + (0.5 * (1.0 - data_availability_score)) # Up to 50% wider interval
|
||||
adjusted_lower_bound = prediction_value - (prediction_value - lower_bound) * adjustment_factor
|
||||
adjusted_upper_bound = prediction_value + (upper_bound - prediction_value) * adjustment_factor
|
||||
|
||||
logger.info("Adjusted weather prediction confidence due to stale historical data",
|
||||
original_confidence=confidence_level,
|
||||
adjusted_confidence=current_confidence_level,
|
||||
data_availability_score=data_availability_score)
|
||||
|
||||
lower_bound = max(0, adjusted_lower_bound)
|
||||
upper_bound = adjusted_upper_bound
|
||||
|
||||
# Apply weather-based adjustments (business rules)
|
||||
adjusted_prediction = self._apply_weather_adjustments(
|
||||
prediction_value,
|
||||
@@ -375,7 +471,8 @@ class PredictionService:
|
||||
"prediction": max(0, adjusted_prediction),
|
||||
"lower_bound": max(0, lower_bound),
|
||||
"upper_bound": max(0, upper_bound),
|
||||
"confidence_level": confidence_level,
|
||||
"confidence_level": current_confidence_level,
|
||||
"data_freshness_score": data_availability_score, # Include data freshness in result
|
||||
"weather": {
|
||||
"temperature": enriched_features['temperature'],
|
||||
"precipitation": enriched_features['precipitation'],
|
||||
@@ -567,6 +664,8 @@ class PredictionService:
|
||||
) -> pd.Series:
|
||||
"""
|
||||
Fetch historical sales data for calculating lagged and rolling features.
|
||||
Enhanced to handle cases where recent data is not available by extending
|
||||
the search for the most recent data if needed.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
@@ -578,7 +677,7 @@ class PredictionService:
|
||||
pandas Series with sales quantities indexed by date
|
||||
"""
|
||||
try:
|
||||
# Calculate date range
|
||||
# Calculate initial date range for recent data
|
||||
end_date = forecast_date - pd.Timedelta(days=1) # Day before forecast
|
||||
start_date = end_date - pd.Timedelta(days=days_back)
|
||||
|
||||
@@ -589,7 +688,7 @@ class PredictionService:
|
||||
end_date=end_date.date(),
|
||||
days_back=days_back)
|
||||
|
||||
# Fetch sales data from sales service
|
||||
# First, try to fetch sales data from the recent period
|
||||
sales_data = await self.sales_client.get_sales_data(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date.strftime("%Y-%m-%d"),
|
||||
@@ -598,15 +697,72 @@ class PredictionService:
|
||||
aggregation="daily"
|
||||
)
|
||||
|
||||
# If no recent data found, search for the most recent available data
|
||||
if not sales_data:
|
||||
logger.warning("No historical sales data found",
|
||||
logger.info("No recent sales data found, expanding search to find most recent data",
|
||||
tenant_id=tenant_id,
|
||||
product_id=inventory_product_id)
|
||||
|
||||
# Search for available data in larger time windows (up to 2 years back)
|
||||
search_windows = [365, 730] # 1 year, 2 years
|
||||
|
||||
for window_days in search_windows:
|
||||
extended_start_date = forecast_date - pd.Timedelta(days=window_days)
|
||||
|
||||
logger.debug("Expanding search window for historical data",
|
||||
start_date=extended_start_date.date(),
|
||||
end_date=end_date.date(),
|
||||
window_days=window_days)
|
||||
|
||||
sales_data = await self.sales_client.get_sales_data(
|
||||
tenant_id=tenant_id,
|
||||
start_date=extended_start_date.strftime("%Y-%m-%d"),
|
||||
end_date=end_date.strftime("%Y-%m-%d"),
|
||||
product_id=inventory_product_id,
|
||||
aggregation="daily"
|
||||
)
|
||||
|
||||
if sales_data:
|
||||
logger.info("Found historical data in expanded search window",
|
||||
tenant_id=tenant_id,
|
||||
product_id=inventory_product_id,
|
||||
data_start=sales_data[0]['sale_date'] if sales_data else "None",
|
||||
data_end=sales_data[-1]['sale_date'] if sales_data else "None",
|
||||
window_days=window_days)
|
||||
break
|
||||
|
||||
if not sales_data:
|
||||
logger.warning("No historical sales data found in any search window",
|
||||
tenant_id=tenant_id,
|
||||
product_id=inventory_product_id)
|
||||
return pd.Series(dtype=float)
|
||||
|
||||
# Convert to pandas Series indexed by date
|
||||
# Convert to pandas DataFrame and check if it has the expected structure
|
||||
df = pd.DataFrame(sales_data)
|
||||
df['sale_date'] = pd.to_datetime(df['sale_date'])
|
||||
|
||||
# Check if the expected 'sale_date' column exists
|
||||
if df.empty:
|
||||
logger.warning("No historical sales data returned from API")
|
||||
return pd.Series(dtype=float)
|
||||
|
||||
# Check for available columns and find date column
|
||||
available_columns = list(df.columns)
|
||||
logger.debug(f"Available sales data columns: {available_columns}")
|
||||
|
||||
# Check for alternative date column names
|
||||
date_columns = ['sale_date', 'date', 'forecast_date', 'datetime', 'timestamp']
|
||||
date_column = None
|
||||
for col in date_columns:
|
||||
if col in df.columns:
|
||||
date_column = col
|
||||
break
|
||||
|
||||
if date_column is None:
|
||||
logger.error(f"Sales data missing expected date column. Available columns: {available_columns}")
|
||||
logger.debug(f"Sample of sales data: {df.head()}")
|
||||
return pd.Series(dtype=float)
|
||||
|
||||
df['sale_date'] = pd.to_datetime(df[date_column])
|
||||
df = df.set_index('sale_date')
|
||||
|
||||
# Extract quantity column (could be 'quantity' or 'total_quantity')
|
||||
@@ -639,6 +795,10 @@ class PredictionService:
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate lagged, rolling, and trend features from historical sales data.
|
||||
Enhanced to handle cases where recent data is not available by using
|
||||
available historical data with appropriate temporal adjustments.
|
||||
|
||||
Now uses shared feature calculator for consistency with training service.
|
||||
|
||||
Args:
|
||||
historical_sales: Series of sales quantities indexed by date
|
||||
@@ -647,117 +807,26 @@ class PredictionService:
|
||||
Returns:
|
||||
Dictionary of calculated features
|
||||
"""
|
||||
features = {}
|
||||
|
||||
try:
|
||||
if len(historical_sales) == 0:
|
||||
logger.warning("No historical data available, using default values")
|
||||
# Return all features with default values (0.0)
|
||||
return {
|
||||
# Lagged features
|
||||
'lag_1_day': 0.0,
|
||||
'lag_7_day': 0.0,
|
||||
'lag_14_day': 0.0,
|
||||
# Rolling statistics (7-day window)
|
||||
'rolling_mean_7d': 0.0,
|
||||
'rolling_std_7d': 0.0,
|
||||
'rolling_max_7d': 0.0,
|
||||
'rolling_min_7d': 0.0,
|
||||
# Rolling statistics (14-day window)
|
||||
'rolling_mean_14d': 0.0,
|
||||
'rolling_std_14d': 0.0,
|
||||
'rolling_max_14d': 0.0,
|
||||
'rolling_min_14d': 0.0,
|
||||
# Rolling statistics (30-day window)
|
||||
'rolling_mean_30d': 0.0,
|
||||
'rolling_std_30d': 0.0,
|
||||
'rolling_max_30d': 0.0,
|
||||
'rolling_min_30d': 0.0,
|
||||
# Trend features
|
||||
'days_since_start': 0,
|
||||
'momentum_1_7': 0.0,
|
||||
'trend_7_30': 0.0,
|
||||
'velocity_week': 0.0,
|
||||
}
|
||||
# Use shared feature calculator for consistency
|
||||
from shared.ml.feature_calculator import HistoricalFeatureCalculator
|
||||
|
||||
# Calculate lagged features
|
||||
features['lag_1_day'] = float(historical_sales.iloc[-1]) if len(historical_sales) >= 1 else 0.0
|
||||
features['lag_7_day'] = float(historical_sales.iloc[-7]) if len(historical_sales) >= 7 else features['lag_1_day']
|
||||
features['lag_14_day'] = float(historical_sales.iloc[-14]) if len(historical_sales) >= 14 else features['lag_7_day']
|
||||
calculator = HistoricalFeatureCalculator()
|
||||
|
||||
# Calculate rolling statistics (7-day window)
|
||||
if len(historical_sales) >= 7:
|
||||
window_7d = historical_sales.iloc[-7:]
|
||||
features['rolling_mean_7d'] = float(window_7d.mean())
|
||||
features['rolling_std_7d'] = float(window_7d.std())
|
||||
features['rolling_max_7d'] = float(window_7d.max())
|
||||
features['rolling_min_7d'] = float(window_7d.min())
|
||||
else:
|
||||
features['rolling_mean_7d'] = features['lag_1_day']
|
||||
features['rolling_std_7d'] = 0.0
|
||||
features['rolling_max_7d'] = features['lag_1_day']
|
||||
features['rolling_min_7d'] = features['lag_1_day']
|
||||
# Calculate all features using shared calculator
|
||||
features = calculator.calculate_all_features(
|
||||
sales_data=historical_sales,
|
||||
reference_date=forecast_date,
|
||||
mode='prediction'
|
||||
)
|
||||
|
||||
# Calculate rolling statistics (14-day window)
|
||||
if len(historical_sales) >= 14:
|
||||
window_14d = historical_sales.iloc[-14:]
|
||||
features['rolling_mean_14d'] = float(window_14d.mean())
|
||||
features['rolling_std_14d'] = float(window_14d.std())
|
||||
features['rolling_max_14d'] = float(window_14d.max())
|
||||
features['rolling_min_14d'] = float(window_14d.min())
|
||||
else:
|
||||
features['rolling_mean_14d'] = features['rolling_mean_7d']
|
||||
features['rolling_std_14d'] = features['rolling_std_7d']
|
||||
features['rolling_max_14d'] = features['rolling_max_7d']
|
||||
features['rolling_min_14d'] = features['rolling_min_7d']
|
||||
|
||||
# Calculate rolling statistics (30-day window)
|
||||
if len(historical_sales) >= 30:
|
||||
window_30d = historical_sales.iloc[-30:]
|
||||
features['rolling_mean_30d'] = float(window_30d.mean())
|
||||
features['rolling_std_30d'] = float(window_30d.std())
|
||||
features['rolling_max_30d'] = float(window_30d.max())
|
||||
features['rolling_min_30d'] = float(window_30d.min())
|
||||
else:
|
||||
features['rolling_mean_30d'] = features['rolling_mean_14d']
|
||||
features['rolling_std_30d'] = features['rolling_std_14d']
|
||||
features['rolling_max_30d'] = features['rolling_max_14d']
|
||||
features['rolling_min_30d'] = features['rolling_min_14d']
|
||||
|
||||
# Calculate trend features
|
||||
if len(historical_sales) > 0:
|
||||
# Days since first sale
|
||||
features['days_since_start'] = (forecast_date - historical_sales.index[0]).days
|
||||
|
||||
# Momentum (difference between recent lag_1_day and lag_7_day)
|
||||
if len(historical_sales) >= 7:
|
||||
features['momentum_1_7'] = features['lag_1_day'] - features['lag_7_day']
|
||||
else:
|
||||
features['momentum_1_7'] = 0.0
|
||||
|
||||
# Trend (difference between recent 7-day and 30-day averages)
|
||||
if len(historical_sales) >= 30:
|
||||
features['trend_7_30'] = features['rolling_mean_7d'] - features['rolling_mean_30d']
|
||||
else:
|
||||
features['trend_7_30'] = 0.0
|
||||
|
||||
# Velocity (rate of change over the last week)
|
||||
if len(historical_sales) >= 7:
|
||||
week_change = historical_sales.iloc[-1] - historical_sales.iloc[-7]
|
||||
features['velocity_week'] = float(week_change / 7.0)
|
||||
else:
|
||||
features['velocity_week'] = 0.0
|
||||
else:
|
||||
features['days_since_start'] = 0
|
||||
features['momentum_1_7'] = 0.0
|
||||
features['trend_7_30'] = 0.0
|
||||
features['velocity_week'] = 0.0
|
||||
|
||||
logger.debug("Historical features calculated",
|
||||
lag_1_day=features['lag_1_day'],
|
||||
rolling_mean_7d=features['rolling_mean_7d'],
|
||||
rolling_mean_30d=features['rolling_mean_30d'],
|
||||
momentum=features['momentum_1_7'])
|
||||
logger.debug("Historical features calculated (using shared calculator)",
|
||||
lag_1_day=features.get('lag_1_day', 0.0),
|
||||
rolling_mean_7d=features.get('rolling_mean_7d', 0.0),
|
||||
rolling_mean_30d=features.get('rolling_mean_30d', 0.0),
|
||||
momentum=features.get('momentum_1_7', 0.0),
|
||||
days_since_last_sale=features.get('days_since_last_sale', 0),
|
||||
data_availability_score=features.get('historical_data_availability_score', 0.0))
|
||||
|
||||
return features
|
||||
|
||||
@@ -770,8 +839,9 @@ class PredictionService:
|
||||
'rolling_mean_7d', 'rolling_std_7d', 'rolling_max_7d', 'rolling_min_7d',
|
||||
'rolling_mean_14d', 'rolling_std_14d', 'rolling_max_14d', 'rolling_min_14d',
|
||||
'rolling_mean_30d', 'rolling_std_30d', 'rolling_max_30d', 'rolling_min_30d',
|
||||
'momentum_1_7', 'trend_7_30', 'velocity_week'
|
||||
]} | {'days_since_start': 0}
|
||||
'momentum_1_7', 'trend_7_30', 'velocity_week',
|
||||
'days_since_last_sale', 'historical_data_availability_score'
|
||||
]}
|
||||
|
||||
def _prepare_prophet_features(self, features: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""Convert features to Prophet-compatible DataFrame - COMPLETE FEATURE MATCHING"""
|
||||
@@ -962,6 +1032,9 @@ class PredictionService:
|
||||
'momentum_1_7': float(features.get('momentum_1_7', 0.0)),
|
||||
'trend_7_30': float(features.get('trend_7_30', 0.0)),
|
||||
'velocity_week': float(features.get('velocity_week', 0.0)),
|
||||
# Data freshness metrics to help model understand data recency
|
||||
'days_since_last_sale': int(features.get('days_since_last_sale', 0)),
|
||||
'historical_data_availability_score': float(features.get('historical_data_availability_score', 0.0)),
|
||||
}
|
||||
|
||||
# Calculate interaction features
|
||||
|
||||
@@ -92,7 +92,7 @@ class InventoryAlertRepository:
|
||||
JOIN ingredients i ON s.ingredient_id = i.id
|
||||
WHERE i.tenant_id = :tenant_id
|
||||
AND s.is_available = true
|
||||
AND s.expiration_date <= CURRENT_DATE + INTERVAL ':days_threshold days'
|
||||
AND s.expiration_date <= CURRENT_DATE + (INTERVAL '1 day' * :days_threshold)
|
||||
ORDER BY s.expiration_date ASC, total_value DESC
|
||||
""")
|
||||
|
||||
@@ -134,7 +134,7 @@ class InventoryAlertRepository:
|
||||
FROM temperature_logs tl
|
||||
WHERE tl.tenant_id = :tenant_id
|
||||
AND tl.is_within_range = false
|
||||
AND tl.recorded_at > NOW() - INTERVAL ':hours_back hours'
|
||||
AND tl.recorded_at > NOW() - (INTERVAL '1 hour' * :hours_back)
|
||||
AND tl.alert_triggered = false
|
||||
ORDER BY deviation DESC, tl.recorded_at DESC
|
||||
""")
|
||||
|
||||
@@ -227,9 +227,9 @@ class InventoryAlertService(BaseAlertService, AlertServiceMixin):
|
||||
"""Process expiring items for a tenant"""
|
||||
try:
|
||||
# Group by urgency
|
||||
expired = [i for i in items if i['days_to_expiry'] <= 0]
|
||||
urgent = [i for i in items if 0 < i['days_to_expiry'] <= 2]
|
||||
warning = [i for i in items if 2 < i['days_to_expiry'] <= 7]
|
||||
expired = [i for i in items if i['days_until_expiry'] <= 0]
|
||||
urgent = [i for i in items if 0 < i['days_until_expiry'] <= 2]
|
||||
warning = [i for i in items if 2 < i['days_until_expiry'] <= 7]
|
||||
|
||||
# Process expired products (urgent alerts)
|
||||
if expired:
|
||||
@@ -257,7 +257,7 @@ class InventoryAlertService(BaseAlertService, AlertServiceMixin):
|
||||
'name': item['name'],
|
||||
'stock_id': str(item['stock_id']),
|
||||
'quantity': float(item['current_quantity']),
|
||||
'days_expired': abs(item['days_to_expiry'])
|
||||
'days_expired': abs(item['days_until_expiry'])
|
||||
} for item in expired
|
||||
]
|
||||
}
|
||||
@@ -270,12 +270,12 @@ class InventoryAlertService(BaseAlertService, AlertServiceMixin):
|
||||
'type': 'urgent_expiry',
|
||||
'severity': 'high',
|
||||
'title': f'⏰ Caducidad Urgente: {item["name"]}',
|
||||
'message': f'{item["name"]} caduca en {item["days_to_expiry"]} día(s). Usar prioritariamente.',
|
||||
'message': f'{item["name"]} caduca en {item["days_until_expiry"]} día(s). Usar prioritariamente.',
|
||||
'actions': ['Usar inmediatamente', 'Promoción especial', 'Revisar recetas', 'Documentar'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(item['id']),
|
||||
'stock_id': str(item['stock_id']),
|
||||
'days_to_expiry': item['days_to_expiry'],
|
||||
'days_to_expiry': item['days_until_expiry'],
|
||||
'quantity': float(item['current_quantity'])
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
@@ -18,18 +18,44 @@ depends_on = None
|
||||
def upgrade():
|
||||
"""Rename metadata columns to additional_data to avoid SQLAlchemy reserved attribute conflict"""
|
||||
|
||||
# Rename metadata column in equipment_connection_logs
|
||||
op.execute('ALTER TABLE equipment_connection_logs RENAME COLUMN metadata TO additional_data')
|
||||
# Check if columns need to be renamed (they may already be named additional_data in migration 002)
|
||||
from sqlalchemy import inspect
|
||||
from alembic import op
|
||||
|
||||
# Rename metadata column in equipment_iot_alerts
|
||||
op.execute('ALTER TABLE equipment_iot_alerts RENAME COLUMN metadata TO additional_data')
|
||||
connection = op.get_bind()
|
||||
inspector = inspect(connection)
|
||||
|
||||
# Check equipment_connection_logs table
|
||||
if 'equipment_connection_logs' in inspector.get_table_names():
|
||||
columns = [col['name'] for col in inspector.get_columns('equipment_connection_logs')]
|
||||
if 'metadata' in columns and 'additional_data' not in columns:
|
||||
op.execute('ALTER TABLE equipment_connection_logs RENAME COLUMN metadata TO additional_data')
|
||||
|
||||
# Check equipment_iot_alerts table
|
||||
if 'equipment_iot_alerts' in inspector.get_table_names():
|
||||
columns = [col['name'] for col in inspector.get_columns('equipment_iot_alerts')]
|
||||
if 'metadata' in columns and 'additional_data' not in columns:
|
||||
op.execute('ALTER TABLE equipment_iot_alerts RENAME COLUMN metadata TO additional_data')
|
||||
|
||||
|
||||
def downgrade():
|
||||
"""Revert column names back to metadata"""
|
||||
|
||||
# Revert metadata column in equipment_iot_alerts
|
||||
op.execute('ALTER TABLE equipment_iot_alerts RENAME COLUMN additional_data TO metadata')
|
||||
# Check if columns need to be renamed back
|
||||
from sqlalchemy import inspect
|
||||
from alembic import op
|
||||
|
||||
# Revert metadata column in equipment_connection_logs
|
||||
op.execute('ALTER TABLE equipment_connection_logs RENAME COLUMN additional_data TO metadata')
|
||||
connection = op.get_bind()
|
||||
inspector = inspect(connection)
|
||||
|
||||
# Check equipment_iot_alerts table
|
||||
if 'equipment_iot_alerts' in inspector.get_table_names():
|
||||
columns = [col['name'] for col in inspector.get_columns('equipment_iot_alerts')]
|
||||
if 'additional_data' in columns and 'metadata' not in columns:
|
||||
op.execute('ALTER TABLE equipment_iot_alerts RENAME COLUMN additional_data TO metadata')
|
||||
|
||||
# Check equipment_connection_logs table
|
||||
if 'equipment_connection_logs' in inspector.get_table_names():
|
||||
columns = [col['name'] for col in inspector.get_columns('equipment_connection_logs')]
|
||||
if 'additional_data' in columns and 'metadata' not in columns:
|
||||
op.execute('ALTER TABLE equipment_connection_logs RENAME COLUMN additional_data TO metadata')
|
||||
|
||||
@@ -170,13 +170,49 @@ class EnhancedTenantService:
|
||||
await publish_tenant_created(str(tenant.id), owner_id, bakery_data.name)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish tenant created event", error=str(e))
|
||||
|
||||
|
||||
# Automatically create location-context with city information
|
||||
# This is non-blocking - failure won't prevent tenant creation
|
||||
try:
|
||||
from shared.clients.external_client import ExternalServiceClient
|
||||
from shared.utils.city_normalization import normalize_city_id
|
||||
from app.core.config import settings
|
||||
|
||||
external_client = ExternalServiceClient(settings, "tenant-service")
|
||||
city_id = normalize_city_id(bakery_data.city)
|
||||
|
||||
if city_id:
|
||||
await external_client.create_tenant_location_context(
|
||||
tenant_id=str(tenant.id),
|
||||
city_id=city_id,
|
||||
notes="Auto-created during tenant registration"
|
||||
)
|
||||
logger.info(
|
||||
"Automatically created location-context",
|
||||
tenant_id=str(tenant.id),
|
||||
city_id=city_id
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Could not normalize city for location-context",
|
||||
tenant_id=str(tenant.id),
|
||||
city=bakery_data.city
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to auto-create location-context (non-blocking)",
|
||||
tenant_id=str(tenant.id),
|
||||
city=bakery_data.city,
|
||||
error=str(e)
|
||||
)
|
||||
# Don't fail tenant creation if location-context creation fails
|
||||
|
||||
logger.info("Bakery created successfully",
|
||||
tenant_id=tenant.id,
|
||||
name=bakery_data.name,
|
||||
owner_id=owner_id,
|
||||
subdomain=tenant.subdomain)
|
||||
|
||||
|
||||
return TenantResponse.from_orm(tenant)
|
||||
|
||||
except (ValidationError, DuplicateRecordError) as e:
|
||||
|
||||
@@ -11,7 +11,7 @@ from sqlalchemy import text
|
||||
from app.core.database import get_db
|
||||
from app.schemas.training import TrainedModelResponse, ModelMetricsResponse
|
||||
from app.services.training_service import EnhancedTrainingService
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import select, delete, func
|
||||
import uuid
|
||||
import shutil
|
||||
@@ -79,13 +79,13 @@ async def get_active_model(
|
||||
|
||||
# ✅ FIX: Wrap update query with text() too
|
||||
update_query = text("""
|
||||
UPDATE trained_models
|
||||
SET last_used_at = :now
|
||||
UPDATE trained_models
|
||||
SET last_used_at = :now
|
||||
WHERE id = :model_id
|
||||
""")
|
||||
|
||||
|
||||
await db.execute(update_query, {
|
||||
"now": datetime.utcnow(),
|
||||
"now": datetime.now(timezone.utc),
|
||||
"model_id": model_record.id
|
||||
})
|
||||
await db.commit()
|
||||
@@ -300,7 +300,7 @@ async def delete_tenant_models_complete(
|
||||
|
||||
deletion_stats = {
|
||||
"tenant_id": tenant_id,
|
||||
"deleted_at": datetime.utcnow().isoformat(),
|
||||
"deleted_at": datetime.now(timezone.utc).isoformat(),
|
||||
"jobs_cancelled": 0,
|
||||
"models_deleted": 0,
|
||||
"artifacts_deleted": 0,
|
||||
@@ -322,7 +322,7 @@ async def delete_tenant_models_complete(
|
||||
|
||||
for job in active_jobs:
|
||||
job.status = "cancelled"
|
||||
job.updated_at = datetime.utcnow()
|
||||
job.updated_at = datetime.now(timezone.utc)
|
||||
deletion_stats["jobs_cancelled"] += 1
|
||||
|
||||
if active_jobs:
|
||||
|
||||
@@ -17,7 +17,7 @@ from shared.database.base import create_database_manager
|
||||
from shared.database.transactions import transactional
|
||||
from shared.database.exceptions import DatabaseError
|
||||
from app.core.config import settings
|
||||
from app.ml.enhanced_features import AdvancedFeatureEngineer
|
||||
from shared.ml.enhanced_features import AdvancedFeatureEngineer
|
||||
import holidays
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@@ -7,6 +7,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional
|
||||
import structlog
|
||||
from shared.ml.feature_calculator import HistoricalFeatureCalculator
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@@ -19,10 +20,12 @@ class AdvancedFeatureEngineer:
|
||||
|
||||
def __init__(self):
|
||||
self.feature_columns = []
|
||||
self.feature_calculator = HistoricalFeatureCalculator()
|
||||
|
||||
def add_lagged_features(self, df: pd.DataFrame, lag_days: List[int] = None) -> pd.DataFrame:
|
||||
"""
|
||||
Add lagged demand features for capturing recent trends.
|
||||
Uses shared feature calculator for consistency with prediction service.
|
||||
|
||||
Args:
|
||||
df: DataFrame with 'quantity' column
|
||||
@@ -34,14 +37,20 @@ class AdvancedFeatureEngineer:
|
||||
if lag_days is None:
|
||||
lag_days = [1, 7, 14]
|
||||
|
||||
df = df.copy()
|
||||
# Use shared calculator for consistent lag calculation
|
||||
df = self.feature_calculator.calculate_lag_features(
|
||||
df,
|
||||
lag_days=lag_days,
|
||||
mode='training'
|
||||
)
|
||||
|
||||
# Update feature columns list
|
||||
for lag in lag_days:
|
||||
col_name = f'lag_{lag}_day'
|
||||
df[col_name] = df['quantity'].shift(lag)
|
||||
self.feature_columns.append(col_name)
|
||||
if col_name not in self.feature_columns:
|
||||
self.feature_columns.append(col_name)
|
||||
|
||||
logger.info(f"Added {len(lag_days)} lagged features", lags=lag_days)
|
||||
logger.info(f"Added {len(lag_days)} lagged features (using shared calculator)", lags=lag_days)
|
||||
return df
|
||||
|
||||
def add_rolling_features(
|
||||
@@ -52,6 +61,7 @@ class AdvancedFeatureEngineer:
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Add rolling statistics (mean, std, max, min).
|
||||
Uses shared feature calculator for consistency with prediction service.
|
||||
|
||||
Args:
|
||||
df: DataFrame with 'quantity' column
|
||||
@@ -67,24 +77,22 @@ class AdvancedFeatureEngineer:
|
||||
if features is None:
|
||||
features = ['mean', 'std', 'max', 'min']
|
||||
|
||||
df = df.copy()
|
||||
# Use shared calculator for consistent rolling calculation
|
||||
df = self.feature_calculator.calculate_rolling_features(
|
||||
df,
|
||||
windows=windows,
|
||||
statistics=features,
|
||||
mode='training'
|
||||
)
|
||||
|
||||
# Update feature columns list
|
||||
for window in windows:
|
||||
for feature in features:
|
||||
col_name = f'rolling_{feature}_{window}d'
|
||||
if col_name not in self.feature_columns:
|
||||
self.feature_columns.append(col_name)
|
||||
|
||||
if feature == 'mean':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).mean()
|
||||
elif feature == 'std':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).std()
|
||||
elif feature == 'max':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).max()
|
||||
elif feature == 'min':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).min()
|
||||
|
||||
self.feature_columns.append(col_name)
|
||||
|
||||
logger.info(f"Added rolling features", windows=windows, features=features)
|
||||
logger.info(f"Added rolling features (using shared calculator)", windows=windows, features=features)
|
||||
return df
|
||||
|
||||
def add_day_of_week_features(self, df: pd.DataFrame, date_column: str = 'date') -> pd.DataFrame:
|
||||
@@ -203,6 +211,7 @@ class AdvancedFeatureEngineer:
|
||||
def add_trend_features(self, df: pd.DataFrame, date_column: str = 'date') -> pd.DataFrame:
|
||||
"""
|
||||
Add trend-based features.
|
||||
Uses shared feature calculator for consistency with prediction service.
|
||||
|
||||
Args:
|
||||
df: DataFrame with date and quantity
|
||||
@@ -211,27 +220,18 @@ class AdvancedFeatureEngineer:
|
||||
Returns:
|
||||
DataFrame with trend features
|
||||
"""
|
||||
df = df.copy()
|
||||
# Use shared calculator for consistent trend calculation
|
||||
df = self.feature_calculator.calculate_trend_features(
|
||||
df,
|
||||
mode='training'
|
||||
)
|
||||
|
||||
# Days since start (linear trend proxy)
|
||||
df['days_since_start'] = (df[date_column] - df[date_column].min()).dt.days
|
||||
|
||||
# Momentum indicators (recent change vs. older change)
|
||||
if 'lag_1_day' in df.columns and 'lag_7_day' in df.columns:
|
||||
df['momentum_1_7'] = df['lag_1_day'] - df['lag_7_day']
|
||||
self.feature_columns.append('momentum_1_7')
|
||||
|
||||
if 'rolling_mean_7d' in df.columns and 'rolling_mean_30d' in df.columns:
|
||||
df['trend_7_30'] = df['rolling_mean_7d'] - df['rolling_mean_30d']
|
||||
self.feature_columns.append('trend_7_30')
|
||||
|
||||
# Velocity (rate of change)
|
||||
if 'lag_1_day' in df.columns and 'lag_7_day' in df.columns:
|
||||
df['velocity_week'] = (df['lag_1_day'] - df['lag_7_day']) / 7
|
||||
self.feature_columns.append('velocity_week')
|
||||
|
||||
self.feature_columns.append('days_since_start')
|
||||
# Update feature columns list
|
||||
for feature_name in ['days_since_start', 'momentum_1_7', 'trend_7_30', 'velocity_week']:
|
||||
if feature_name in df.columns and feature_name not in self.feature_columns:
|
||||
self.feature_columns.append(feature_name)
|
||||
|
||||
logger.debug("Added trend features (using shared calculator)")
|
||||
return df
|
||||
|
||||
def add_cyclical_encoding(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
|
||||
@@ -7,7 +7,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import joblib
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
|
||||
from sklearn.model_selection import TimeSeriesSplit
|
||||
@@ -408,7 +408,7 @@ class HybridProphetXGBoost:
|
||||
},
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'trained_at': datetime.utcnow().isoformat()
|
||||
'trained_at': datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
|
||||
async def predict(
|
||||
|
||||
@@ -844,6 +844,9 @@ class EnhancedBakeryMLTrainer:
|
||||
# Extract training period from the processed data
|
||||
training_start_date = None
|
||||
training_end_date = None
|
||||
data_freshness_days = None
|
||||
data_coverage_days = None
|
||||
|
||||
if 'ds' in processed_data.columns and not processed_data.empty:
|
||||
# Ensure ds column is datetime64 before extracting dates (prevents object dtype issues)
|
||||
ds_datetime = pd.to_datetime(processed_data['ds'])
|
||||
@@ -857,6 +860,15 @@ class EnhancedBakeryMLTrainer:
|
||||
training_start_date = pd.Timestamp(min_ts).to_pydatetime().replace(tzinfo=None)
|
||||
if pd.notna(max_ts):
|
||||
training_end_date = pd.Timestamp(max_ts).to_pydatetime().replace(tzinfo=None)
|
||||
|
||||
# Calculate data freshness metrics
|
||||
if training_end_date:
|
||||
from datetime import datetime
|
||||
data_freshness_days = (datetime.now() - training_end_date).days
|
||||
|
||||
# Calculate data coverage period
|
||||
if training_start_date and training_end_date:
|
||||
data_coverage_days = (training_end_date - training_start_date).days
|
||||
|
||||
# Ensure features are clean string list
|
||||
try:
|
||||
@@ -864,6 +876,13 @@ class EnhancedBakeryMLTrainer:
|
||||
except Exception:
|
||||
features_used = []
|
||||
|
||||
# Prepare hyperparameters with data freshness metrics
|
||||
hyperparameters = model_info.get("hyperparameters", {})
|
||||
if data_freshness_days is not None:
|
||||
hyperparameters["data_freshness_days"] = data_freshness_days
|
||||
if data_coverage_days is not None:
|
||||
hyperparameters["data_coverage_days"] = data_coverage_days
|
||||
|
||||
model_data = {
|
||||
"tenant_id": tenant_id,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
@@ -876,7 +895,7 @@ class EnhancedBakeryMLTrainer:
|
||||
"rmse": float(model_info.get("training_metrics", {}).get("rmse", 0)) if model_info.get("training_metrics", {}).get("rmse") is not None else 0,
|
||||
"r2_score": float(model_info.get("training_metrics", {}).get("r2", 0)) if model_info.get("training_metrics", {}).get("r2") is not None else 0,
|
||||
"training_samples": int(len(processed_data)),
|
||||
"hyperparameters": self._serialize_scalers(model_info.get("hyperparameters", {})),
|
||||
"hyperparameters": self._serialize_scalers(hyperparameters),
|
||||
"features_used": [str(f) for f in features_used] if features_used else [],
|
||||
"normalization_params": self._serialize_scalers(self.enhanced_data_processor.get_scalers()) or {}, # Include scalers for prediction consistency
|
||||
"product_category": model_info.get("product_category", "unknown"), # Store product category
|
||||
@@ -890,7 +909,9 @@ class EnhancedBakeryMLTrainer:
|
||||
model_record = await repos['model'].create_model(model_data)
|
||||
logger.info("Created enhanced model record",
|
||||
inventory_product_id=inventory_product_id,
|
||||
model_id=model_record.id)
|
||||
model_id=model_record.id,
|
||||
data_freshness_days=data_freshness_days,
|
||||
data_coverage_days=data_coverage_days)
|
||||
|
||||
# Create artifacts for model files
|
||||
if model_info.get("model_path"):
|
||||
|
||||
@@ -6,7 +6,7 @@ Service-specific repository base class with training service utilities
|
||||
from typing import Optional, List, Dict, Any, Type
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import text
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import structlog
|
||||
|
||||
from shared.database.repository import BaseRepository
|
||||
@@ -73,7 +73,7 @@ class TrainingBaseRepository(BaseRepository):
|
||||
async def cleanup_old_records(self, days_old: int = 90, status_filter: str = None) -> int:
|
||||
"""Clean up old training records"""
|
||||
try:
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_old)
|
||||
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_old)
|
||||
table_name = self.model.__tablename__
|
||||
|
||||
# Build query based on available fields
|
||||
|
||||
@@ -6,7 +6,7 @@ Repository for trained model operations
|
||||
from typing import Optional, List, Dict, Any
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, text, desc
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import structlog
|
||||
|
||||
from .base import TrainingBaseRepository
|
||||
@@ -144,7 +144,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
# Promote this model
|
||||
updated_model = await self.update(model_id, {
|
||||
"is_production": True,
|
||||
"last_used_at": datetime.utcnow()
|
||||
"last_used_at": datetime.now(timezone.utc)
|
||||
})
|
||||
|
||||
logger.info("Model promoted to production",
|
||||
@@ -164,7 +164,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
"""Update model last used timestamp"""
|
||||
try:
|
||||
return await self.update(model_id, {
|
||||
"last_used_at": datetime.utcnow()
|
||||
"last_used_at": datetime.now(timezone.utc)
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("Failed to update model usage",
|
||||
@@ -176,7 +176,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
async def archive_old_models(self, tenant_id: str, days_old: int = 90) -> int:
|
||||
"""Archive old non-production models"""
|
||||
try:
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_old)
|
||||
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_old)
|
||||
|
||||
query = text("""
|
||||
UPDATE trained_models
|
||||
@@ -235,7 +235,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
product_stats = {row.inventory_product_id: row.count for row in result.fetchall()}
|
||||
|
||||
# Recent activity (models created in last 30 days)
|
||||
thirty_days_ago = datetime.utcnow() - timedelta(days=30)
|
||||
thirty_days_ago = datetime.now(timezone.utc) - timedelta(days=30)
|
||||
recent_models_query = text("""
|
||||
SELECT COUNT(*) as count
|
||||
FROM trained_models
|
||||
|
||||
Reference in New Issue
Block a user