import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as acc
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.datasets import fetch_california_housing
Sfs And Bfs
Interactive tutorial on sfs and bfs with practical implementations and visualizations
from mlxtend.feature_selection import Sequential
# Read data
= fetch_california_housing()
data = data['data']
X = data['target']
y
# Train/test split
= train_test_split(
X_train, X_test, y_train, y_test
X,
y,=0.25,
test_size=42)
random_state
= y_train.ravel()
y_train = y_test.ravel()
y_test
print('Training dataset shape:', X_train.shape, y_train.shape)
print('Testing dataset shape:', X_test.shape, y_test.shape)
Training dataset shape: (15480, 8) (15480,)
Testing dataset shape: (5160, 8) (5160,)
= DecisionTreeRegressor()
clf # clf = DecisionTreeClassifier()
# Build step forward feature selection
= sfs(clf,
sfs1 =6,
k_features=True,
forward=False,
floating=2,
verbose# scoring='accuracy',
='neg_root_mean_squared_error',
scoring=5)
cv
# Perform SFFS
= sfs1.fit(X_train, y_train) sfs1
= list(sfs1.k_feature_idx_)
feat_cols
# data['features']
'feature_names'])[feat_cols] np.array(data[
array(['MedInc', 'AveRooms', 'AveBedrms', 'Population', 'Latitude',
'Longitude'], dtype='<U10')
'feature_names'] data[
['MedInc',
'HouseAge',
'AveRooms',
'AveBedrms',
'Population',
'AveOccup',
'Latitude',
'Longitude']
= DecisionTreeRegressor()
clf # clf = DecisionTreeClassifier()
# Build step forward feature selection
= sfs(clf,
sbs =1,
k_features=False,
forward=False,
floating=2,
verbose# scoring='accuracy',
='neg_root_mean_squared_error',
scoring=5)
cv
# Perform SFFS
= sbs.fit(X_train, y_train) sfs1