imporve features
This commit is contained in:
@@ -7,6 +7,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional
|
||||
import structlog
|
||||
from shared.ml.feature_calculator import HistoricalFeatureCalculator
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@@ -19,10 +20,12 @@ class AdvancedFeatureEngineer:
|
||||
|
||||
def __init__(self):
|
||||
self.feature_columns = []
|
||||
self.feature_calculator = HistoricalFeatureCalculator()
|
||||
|
||||
def add_lagged_features(self, df: pd.DataFrame, lag_days: List[int] = None) -> pd.DataFrame:
|
||||
"""
|
||||
Add lagged demand features for capturing recent trends.
|
||||
Uses shared feature calculator for consistency with prediction service.
|
||||
|
||||
Args:
|
||||
df: DataFrame with 'quantity' column
|
||||
@@ -34,14 +37,20 @@ class AdvancedFeatureEngineer:
|
||||
if lag_days is None:
|
||||
lag_days = [1, 7, 14]
|
||||
|
||||
df = df.copy()
|
||||
# Use shared calculator for consistent lag calculation
|
||||
df = self.feature_calculator.calculate_lag_features(
|
||||
df,
|
||||
lag_days=lag_days,
|
||||
mode='training'
|
||||
)
|
||||
|
||||
# Update feature columns list
|
||||
for lag in lag_days:
|
||||
col_name = f'lag_{lag}_day'
|
||||
df[col_name] = df['quantity'].shift(lag)
|
||||
self.feature_columns.append(col_name)
|
||||
if col_name not in self.feature_columns:
|
||||
self.feature_columns.append(col_name)
|
||||
|
||||
logger.info(f"Added {len(lag_days)} lagged features", lags=lag_days)
|
||||
logger.info(f"Added {len(lag_days)} lagged features (using shared calculator)", lags=lag_days)
|
||||
return df
|
||||
|
||||
def add_rolling_features(
|
||||
@@ -52,6 +61,7 @@ class AdvancedFeatureEngineer:
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Add rolling statistics (mean, std, max, min).
|
||||
Uses shared feature calculator for consistency with prediction service.
|
||||
|
||||
Args:
|
||||
df: DataFrame with 'quantity' column
|
||||
@@ -67,24 +77,22 @@ class AdvancedFeatureEngineer:
|
||||
if features is None:
|
||||
features = ['mean', 'std', 'max', 'min']
|
||||
|
||||
df = df.copy()
|
||||
# Use shared calculator for consistent rolling calculation
|
||||
df = self.feature_calculator.calculate_rolling_features(
|
||||
df,
|
||||
windows=windows,
|
||||
statistics=features,
|
||||
mode='training'
|
||||
)
|
||||
|
||||
# Update feature columns list
|
||||
for window in windows:
|
||||
for feature in features:
|
||||
col_name = f'rolling_{feature}_{window}d'
|
||||
if col_name not in self.feature_columns:
|
||||
self.feature_columns.append(col_name)
|
||||
|
||||
if feature == 'mean':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).mean()
|
||||
elif feature == 'std':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).std()
|
||||
elif feature == 'max':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).max()
|
||||
elif feature == 'min':
|
||||
df[col_name] = df['quantity'].rolling(window=window, min_periods=max(1, window // 2)).min()
|
||||
|
||||
self.feature_columns.append(col_name)
|
||||
|
||||
logger.info(f"Added rolling features", windows=windows, features=features)
|
||||
logger.info(f"Added rolling features (using shared calculator)", windows=windows, features=features)
|
||||
return df
|
||||
|
||||
def add_day_of_week_features(self, df: pd.DataFrame, date_column: str = 'date') -> pd.DataFrame:
|
||||
@@ -203,6 +211,7 @@ class AdvancedFeatureEngineer:
|
||||
def add_trend_features(self, df: pd.DataFrame, date_column: str = 'date') -> pd.DataFrame:
|
||||
"""
|
||||
Add trend-based features.
|
||||
Uses shared feature calculator for consistency with prediction service.
|
||||
|
||||
Args:
|
||||
df: DataFrame with date and quantity
|
||||
@@ -211,27 +220,18 @@ class AdvancedFeatureEngineer:
|
||||
Returns:
|
||||
DataFrame with trend features
|
||||
"""
|
||||
df = df.copy()
|
||||
# Use shared calculator for consistent trend calculation
|
||||
df = self.feature_calculator.calculate_trend_features(
|
||||
df,
|
||||
mode='training'
|
||||
)
|
||||
|
||||
# Days since start (linear trend proxy)
|
||||
df['days_since_start'] = (df[date_column] - df[date_column].min()).dt.days
|
||||
|
||||
# Momentum indicators (recent change vs. older change)
|
||||
if 'lag_1_day' in df.columns and 'lag_7_day' in df.columns:
|
||||
df['momentum_1_7'] = df['lag_1_day'] - df['lag_7_day']
|
||||
self.feature_columns.append('momentum_1_7')
|
||||
|
||||
if 'rolling_mean_7d' in df.columns and 'rolling_mean_30d' in df.columns:
|
||||
df['trend_7_30'] = df['rolling_mean_7d'] - df['rolling_mean_30d']
|
||||
self.feature_columns.append('trend_7_30')
|
||||
|
||||
# Velocity (rate of change)
|
||||
if 'lag_1_day' in df.columns and 'lag_7_day' in df.columns:
|
||||
df['velocity_week'] = (df['lag_1_day'] - df['lag_7_day']) / 7
|
||||
self.feature_columns.append('velocity_week')
|
||||
|
||||
self.feature_columns.append('days_since_start')
|
||||
# Update feature columns list
|
||||
for feature_name in ['days_since_start', 'momentum_1_7', 'trend_7_30', 'velocity_week']:
|
||||
if feature_name in df.columns and feature_name not in self.feature_columns:
|
||||
self.feature_columns.append(feature_name)
|
||||
|
||||
logger.debug("Added trend features (using shared calculator)")
|
||||
return df
|
||||
|
||||
def add_cyclical_encoding(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
|
||||
Reference in New Issue
Block a user