import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as acc
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.datasets import fetch_california_housing
Sfs And Bfs
Interactive tutorial on sfs and bfs with practical implementations and visualizations
from mlxtend.feature_selection import Sequential
# Read data
data = fetch_california_housing()
X = data['data']
y = data['target']
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.25,
random_state=42)
y_train = y_train.ravel()
y_test = y_test.ravel()
print('Training dataset shape:', X_train.shape, y_train.shape)
print('Testing dataset shape:', X_test.shape, y_test.shape)Training dataset shape: (15480, 8) (15480,)
Testing dataset shape: (5160, 8) (5160,)
clf = DecisionTreeRegressor()
# clf = DecisionTreeClassifier()
# Build step forward feature selection
sfs1 = sfs(clf,
k_features=6,
forward=True,
floating=False,
verbose=2,
# scoring='accuracy',
scoring='neg_root_mean_squared_error',
cv=5)
# Perform SFFS
sfs1 = sfs1.fit(X_train, y_train)feat_cols = list(sfs1.k_feature_idx_)
# data['features']np.array(data['feature_names'])[feat_cols]array(['MedInc', 'AveRooms', 'AveBedrms', 'Population', 'Latitude',
'Longitude'], dtype='<U10')
data['feature_names']['MedInc',
'HouseAge',
'AveRooms',
'AveBedrms',
'Population',
'AveOccup',
'Latitude',
'Longitude']
clf = DecisionTreeRegressor()
# clf = DecisionTreeClassifier()
# Build step forward feature selection
sbs = sfs(clf,
k_features=1,
forward=False,
floating=False,
verbose=2,
# scoring='accuracy',
scoring='neg_root_mean_squared_error',
cv=5)
# Perform SFFS
sfs1 = sbs.fit(X_train, y_train)