Sfs And Bfs

Interactive tutorial on sfs and bfs with practical implementations and visualizations
Author

Nipun Batra

Published

July 24, 2025

Open In Colab
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as acc
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.datasets import fetch_california_housing
from mlxtend.feature_selection import Sequential

# Read data
data =  fetch_california_housing()
X = data['data']
y = data['target']


# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42)

y_train = y_train.ravel()
y_test = y_test.ravel()

print('Training dataset shape:', X_train.shape, y_train.shape)
print('Testing dataset shape:', X_test.shape, y_test.shape)
Training dataset shape: (15480, 8) (15480,)
Testing dataset shape: (5160, 8) (5160,)
clf = DecisionTreeRegressor()
# clf = DecisionTreeClassifier()

# Build step forward feature selection
sfs1 = sfs(clf,
           k_features=6,
           forward=True,
           floating=False,
           verbose=2,
          #  scoring='accuracy',
           scoring='neg_root_mean_squared_error',
           cv=5)

# Perform SFFS
sfs1 = sfs1.fit(X_train, y_train)
feat_cols = list(sfs1.k_feature_idx_)

# data['features']
np.array(data['feature_names'])[feat_cols]
array(['MedInc', 'AveRooms', 'AveBedrms', 'Population', 'Latitude',
       'Longitude'], dtype='<U10')
data['feature_names']
['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']
clf = DecisionTreeRegressor()
# clf = DecisionTreeClassifier()

# Build step forward feature selection
sbs = sfs(clf,
           k_features=1,
           forward=False,
           floating=False,
           verbose=2,
          #  scoring='accuracy',
           scoring='neg_root_mean_squared_error',
           cv=5)

# Perform SFFS
sfs1 = sbs.fit(X_train, y_train)