Add POI feature and imporve the overall backend implementation

This commit is contained in:
Urtzi Alfaro
2025-11-12 15:34:10 +01:00
parent e8096cd979
commit 5783c7ed05
173 changed files with 16862 additions and 9078 deletions

View File

@@ -116,20 +116,22 @@ class EnhancedBakeryDataProcessor:
weather_data: pd.DataFrame,
traffic_data: pd.DataFrame,
inventory_product_id: str,
poi_features: Dict[str, Any] = None,
tenant_id: str = None,
job_id: str = None,
session=None) -> pd.DataFrame:
"""
Prepare comprehensive training data for a specific product with repository logging.
Args:
sales_data: Historical sales data for the product
weather_data: Weather data
traffic_data: Traffic data
inventory_product_id: Inventory product UUID for logging
poi_features: POI features (location-based, static)
tenant_id: Optional tenant ID for tracking
job_id: Optional job ID for tracking
Returns:
DataFrame ready for Prophet training with 'ds' and 'y' columns plus features
"""
@@ -250,6 +252,18 @@ class EnhancedBakeryDataProcessor:
inventory_product_id=inventory_product_id,
total_features=len(daily_sales.columns))
logger.debug("Starting Step 8b: Add POI features",
inventory_product_id=inventory_product_id)
# Step 8b: Add POI features (static, location-based)
if poi_features:
daily_sales = self._add_poi_features(daily_sales, poi_features)
logger.debug("Step 8b completed: Add POI features",
inventory_product_id=inventory_product_id,
poi_feature_count=len(poi_features))
else:
logger.debug("Step 8b skipped: No POI features available",
inventory_product_id=inventory_product_id)
logger.debug("Starting Step 9: Handle missing values",
inventory_product_id=inventory_product_id)
# Step 7: Handle missing values
@@ -331,6 +345,7 @@ class EnhancedBakeryDataProcessor:
future_dates: pd.DatetimeIndex,
weather_forecast: pd.DataFrame = None,
traffic_forecast: pd.DataFrame = None,
poi_features: Dict[str, Any] = None,
historical_data: pd.DataFrame = None) -> pd.DataFrame:
"""
Create features for future predictions with proper date handling.
@@ -339,6 +354,7 @@ class EnhancedBakeryDataProcessor:
future_dates: Future dates to predict
weather_forecast: Weather forecast data
traffic_forecast: Traffic forecast data
poi_features: POI features (location-based, static)
historical_data: Historical data for creating lagged and rolling features
Returns:
@@ -390,6 +406,10 @@ class EnhancedBakeryDataProcessor:
logger.warning("No historical data provided, lagged features will be NaN")
future_df = self._add_advanced_features(future_df)
# Add POI features (static, location-based)
if poi_features:
future_df = self._add_poi_features(future_df, poi_features)
future_df = future_df.rename(columns={'date': 'ds'})
# Handle missing values in future data
@@ -1171,7 +1191,42 @@ class EnhancedBakeryDataProcessor:
df[col] = df[col].fillna(default_value)
return df
def _add_poi_features(self, df: pd.DataFrame, poi_features: Dict[str, Any]) -> pd.DataFrame:
"""
Add POI features to training dataframe.
POI features are static (location-based, not time-varying),
so they're broadcast to all rows in the dataframe.
Args:
df: Training dataframe
poi_features: Dictionary of POI ML features
Returns:
Dataframe with POI features added as columns
"""
if not poi_features:
logger.warning("No POI features to add")
return df
logger.info(f"Adding {len(poi_features)} POI features to dataframe")
# Add each POI feature as a column with constant value
for feature_name, feature_value in poi_features.items():
# Convert boolean to int for ML compatibility
if isinstance(feature_value, bool):
feature_value = 1 if feature_value else 0
df[feature_name] = feature_value
logger.info(
"POI features added successfully",
feature_count=len(poi_features),
feature_names=list(poi_features.keys())[:5] # Log first 5 for brevity
)
return df
def _prepare_prophet_format(self, df: pd.DataFrame) -> pd.DataFrame:
"""Prepare data in Prophet format with enhanced validation"""
prophet_df = df.copy()