import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as acc
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.datasets import fetch_california_housing
from mlxtend.feature_selection import Sequential

# Read data
data =  fetch_california_housing()
X = data['data']
y = data['target']


# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42)

y_train = y_train.ravel()
y_test = y_test.ravel()

print('Training dataset shape:', X_train.shape, y_train.shape)
print('Testing dataset shape:', X_test.shape, y_test.shape)
Training dataset shape: (15480, 8) (15480,)
Testing dataset shape: (5160, 8) (5160,)
clf = DecisionTreeRegressor()
# clf = DecisionTreeClassifier()

# Build step forward feature selection
sfs1 = sfs(clf,
           k_features=6,
           forward=True,
           floating=False,
           verbose=2,
          #  scoring='accuracy',
           scoring='neg_root_mean_squared_error',
           cv=5)

# Perform SFFS
sfs1 = sfs1.fit(X_train, y_train)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    1.1s finished

[2020-01-18 12:14:41] Features: 1/6 -- score: -0.9799855743872914[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    1.4s finished

[2020-01-18 12:14:42] Features: 2/6 -- score: -0.6329184295861372[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    1.6s finished

[2020-01-18 12:14:44] Features: 3/6 -- score: -0.6522227062026185[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    1.9s finished

[2020-01-18 12:14:45] Features: 4/6 -- score: -0.6627208539646012[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    1.9s finished

[2020-01-18 12:14:47] Features: 5/6 -- score: -0.6800772168838566[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    1.7s finished

[2020-01-18 12:14:49] Features: 6/6 -- score: -0.6988981779449276
feat_cols = list(sfs1.k_feature_idx_)

# data['features']
np.array(data['feature_names'])[feat_cols]
array(['MedInc', 'AveRooms', 'AveBedrms', 'Population', 'Latitude',
       'Longitude'], dtype='<U10')
data['feature_names']
['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']
clf = DecisionTreeRegressor()
# clf = DecisionTreeClassifier()

# Build step forward feature selection
sbs = sfs(clf,
           k_features=1,
           forward=False,
           floating=False,
           verbose=2,
          #  scoring='accuracy',
           scoring='neg_root_mean_squared_error',
           cv=5)

# Perform SFFS
sfs1 = sbs.fit(X_train, y_train)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    5.2s finished

[2020-01-18 12:26:50] Features: 7/1 -- score: -0.7097202737310716[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    3.9s finished

[2020-01-18 12:26:54] Features: 6/1 -- score: -0.6985180206966245[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    2.8s finished

[2020-01-18 12:26:57] Features: 5/1 -- score: -0.6766309576585353[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    1.8s finished

[2020-01-18 12:26:58] Features: 4/1 -- score: -0.6683220592138266[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    1.2s finished

[2020-01-18 12:27:00] Features: 3/1 -- score: -0.6613854217987167[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.6s finished

[2020-01-18 12:27:00] Features: 2/1 -- score: -0.6320510743499647[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s finished

[2020-01-18 12:27:00] Features: 1/1 -- score: -0.9799855743872914