import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as acc
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.datasets import fetch_california_housing
from mlxtend.feature_selection import Sequential
# Read data
= fetch_california_housing()
data = data['data']
X = data['target']
y
# Train/test split
= train_test_split(
X_train, X_test, y_train, y_test
X,
y,=0.25,
test_size=42)
random_state
= y_train.ravel()
y_train = y_test.ravel()
y_test
print('Training dataset shape:', X_train.shape, y_train.shape)
print('Testing dataset shape:', X_test.shape, y_test.shape)
Training dataset shape: (15480, 8) (15480,)
Testing dataset shape: (5160, 8) (5160,)
= DecisionTreeRegressor()
clf # clf = DecisionTreeClassifier()
# Build step forward feature selection
= sfs(clf,
sfs1 =6,
k_features=True,
forward=False,
floating=2,
verbose# scoring='accuracy',
='neg_root_mean_squared_error',
scoring=5)
cv
# Perform SFFS
= sfs1.fit(X_train, y_train) sfs1
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.2s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 1.1s finished
[2020-01-18 12:14:41] Features: 1/6 -- score: -0.9799855743872914[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.2s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 1.4s finished
[2020-01-18 12:14:42] Features: 2/6 -- score: -0.6329184295861372[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.3s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 1.6s finished
[2020-01-18 12:14:44] Features: 3/6 -- score: -0.6522227062026185[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.3s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 1.9s finished
[2020-01-18 12:14:45] Features: 4/6 -- score: -0.6627208539646012[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.4s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 1.9s finished
[2020-01-18 12:14:47] Features: 5/6 -- score: -0.6800772168838566[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.5s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 1.7s finished
[2020-01-18 12:14:49] Features: 6/6 -- score: -0.6988981779449276
= list(sfs1.k_feature_idx_)
feat_cols
# data['features']
'feature_names'])[feat_cols] np.array(data[
array(['MedInc', 'AveRooms', 'AveBedrms', 'Population', 'Latitude',
'Longitude'], dtype='<U10')
'feature_names'] data[
['MedInc',
'HouseAge',
'AveRooms',
'AveBedrms',
'Population',
'AveOccup',
'Latitude',
'Longitude']
= DecisionTreeRegressor()
clf # clf = DecisionTreeClassifier()
# Build step forward feature selection
= sfs(clf,
sbs =1,
k_features=False,
forward=False,
floating=2,
verbose# scoring='accuracy',
='neg_root_mean_squared_error',
scoring=5)
cv
# Perform SFFS
= sbs.fit(X_train, y_train) sfs1
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.7s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 5.2s finished
[2020-01-18 12:26:50] Features: 7/1 -- score: -0.7097202737310716[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.6s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 3.9s finished
[2020-01-18 12:26:54] Features: 6/1 -- score: -0.6985180206966245[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.5s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 2.8s finished
[2020-01-18 12:26:57] Features: 5/1 -- score: -0.6766309576585353[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.4s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 1.8s finished
[2020-01-18 12:26:58] Features: 4/1 -- score: -0.6683220592138266[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.3s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 1.2s finished
[2020-01-18 12:27:00] Features: 3/1 -- score: -0.6613854217987167[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.2s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 0.6s finished
[2020-01-18 12:27:00] Features: 2/1 -- score: -0.6320510743499647[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.1s finished
[2020-01-18 12:27:00] Features: 1/1 -- score: -0.9799855743872914