Add improvements
This commit is contained in:
@@ -157,8 +157,13 @@ class AdvancedFeatureEngineer:
|
||||
# Week of year
|
||||
df['week_of_year'] = df[date_column].dt.isocalendar().week
|
||||
|
||||
# Payday indicators (15th and last day of month - high bakery traffic)
|
||||
df['is_payday'] = ((df['day_of_month'] == 15) | df[date_column].dt.is_month_end).astype(int)
|
||||
# Payday indicators for Spain (high bakery traffic)
|
||||
# Spain commonly pays on: 28th, 15th, or last day of month
|
||||
df['is_payday'] = (
|
||||
(df['day_of_month'] == 15) | # Mid-month payday
|
||||
(df['day_of_month'] == 28) | # Common Spanish payday (28th)
|
||||
df[date_column].dt.is_month_end # End of month
|
||||
).astype(int)
|
||||
|
||||
# Add to feature list
|
||||
for col in ['month', 'quarter', 'day_of_month', 'is_month_start', 'is_month_end',
|
||||
@@ -319,24 +324,27 @@ class AdvancedFeatureEngineer:
|
||||
"""Get list of all created feature column names."""
|
||||
return self.feature_columns.copy()
|
||||
|
||||
def fill_na_values(self, df: pd.DataFrame, strategy: str = 'forward_backward') -> pd.DataFrame:
|
||||
def fill_na_values(self, df: pd.DataFrame, strategy: str = 'forward_mean') -> pd.DataFrame:
|
||||
"""
|
||||
Fill NA values in lagged and rolling features.
|
||||
|
||||
IMPORTANT: Never uses backward fill to prevent data leakage in time series training.
|
||||
|
||||
Args:
|
||||
df: DataFrame with potential NA values
|
||||
strategy: 'forward_backward', 'zero', 'mean'
|
||||
strategy: 'forward_mean', 'zero', 'mean'
|
||||
|
||||
Returns:
|
||||
DataFrame with filled NA values
|
||||
"""
|
||||
df = df.copy()
|
||||
|
||||
if strategy == 'forward_backward':
|
||||
if strategy == 'forward_mean':
|
||||
# Forward fill first (use previous values)
|
||||
df = df.fillna(method='ffill')
|
||||
# Backward fill remaining (beginning of series)
|
||||
df = df.fillna(method='bfill')
|
||||
# Fill remaining with mean (typically at beginning of series)
|
||||
# NEVER use bfill as it leaks future information into training data
|
||||
df = df.fillna(df.mean())
|
||||
|
||||
elif strategy == 'zero':
|
||||
df = df.fillna(0)
|
||||
|
||||
Reference in New Issue
Block a user