Logistic Regression - Basis

Author

Nipun Batra

Published

January 1, 2024

Open In Colab

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from latexify import *
from sklearn.linear_model import LogisticRegression
import matplotlib.patches as mpatches
%config InlineBackend.figure_format = 'retina'

# Choose some points between

np.random.seed(0)
x1 = np.random.randn(1, 100)
x2 = np.random.randn(1, 100)

y = x1**2 + x2**2

x1

array([[ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
        -0.97727788,  0.95008842, -0.15135721, -0.10321885,  0.4105985 ,
         0.14404357,  1.45427351,  0.76103773,  0.12167502,  0.44386323,
         0.33367433,  1.49407907, -0.20515826,  0.3130677 , -0.85409574,
        -2.55298982,  0.6536186 ,  0.8644362 , -0.74216502,  2.26975462,
        -1.45436567,  0.04575852, -0.18718385,  1.53277921,  1.46935877,
         0.15494743,  0.37816252, -0.88778575, -1.98079647, -0.34791215,
         0.15634897,  1.23029068,  1.20237985, -0.38732682, -0.30230275,
        -1.04855297, -1.42001794, -1.70627019,  1.9507754 , -0.50965218,
        -0.4380743 , -1.25279536,  0.77749036, -1.61389785, -0.21274028,
        -0.89546656,  0.3869025 , -0.51080514, -1.18063218, -0.02818223,
         0.42833187,  0.06651722,  0.3024719 , -0.63432209, -0.36274117,
        -0.67246045, -0.35955316, -0.81314628, -1.7262826 ,  0.17742614,
        -0.40178094, -1.63019835,  0.46278226, -0.90729836,  0.0519454 ,
         0.72909056,  0.12898291,  1.13940068, -1.23482582,  0.40234164,
        -0.68481009, -0.87079715, -0.57884966, -0.31155253,  0.05616534,
        -1.16514984,  0.90082649,  0.46566244, -1.53624369,  1.48825219,
         1.89588918,  1.17877957, -0.17992484, -1.07075262,  1.05445173,
        -0.40317695,  1.22244507,  0.20827498,  0.97663904,  0.3563664 ,
         0.70657317,  0.01050002,  1.78587049,  0.12691209,  0.40198936]])

y[y>1] = 1
y[y<1] = 0

c = 0
for i in range(100):
    if y[0, i] == 1:
        y[0, i] = 0
        c += 1
    if c == 10:
        break

latexify()
plt.scatter(x1, x2, c=y,s=5)


yellow_patch = mpatches.Patch(color='yellow', label='Oranges')
blue_patch = mpatches.Patch(color='darkblue', label='Tomatoes')
plt.legend(handles=[yellow_patch, blue_patch])
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")
plt.gca().set_aspect('equal')
format_axes(plt.gca())
plt.savefig("../figures/logistic-regression/logisitic-circular-data.pdf", bbox_inches="tight", transparent=True)

np.vstack((x1, x2)).shape

(2, 100)

clf_1 = LogisticRegression(penalty='none',solver='newton-cg')
clf_1.fit(np.vstack((x1, x2)).T, y.T)

LogisticRegression(penalty='none', solver='newton-cg')

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

X = np.vstack((x1, x2)).T
# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 0.3, X[:, 0].max() + 0.3
y_min, y_max = X[:, 1].min() - 0.3, X[:, 1].max() + 0.3
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))





Z = clf_1.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
latexify()
yellow_patch = mpatches.Patch(color='yellow', label='Oranges')
blue_patch = mpatches.Patch(color='darkblue', label='Tomatoes')
pink_patch = mpatches.Patch(color='darksalmon', label='Predict oranges')
lblue_patch = mpatches.Patch(color='lightblue', label='Predict tomatoes')
plt.legend(handles=[yellow_patch, blue_patch, pink_patch, lblue_patch], loc='upper center',
           bbox_to_anchor=(0.5, 1.25),
          ncol=2, fancybox=True, shadow=True)

plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.4)
plt.gca().set_aspect('equal')
plt.scatter(x1, x2, c=y,s=5)
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")
plt.gca().set_aspect('equal')
plt.savefig("../figures/logistic-regression/logisitic-linear-prediction.pdf", bbox_inches="tight", transparent=True)

new_x = np.zeros((4, 100))

new_x[0] = x1
new_x[1] = x2
new_x[2] = x1**2
new_x[3] = x2**2

clf = LogisticRegression(penalty='none',solver='newton-cg')

clf.fit(new_x.T, y.T)

LogisticRegression(penalty='none', solver='newton-cg')

clf.coef_

array([[-0.50464855, -0.30337009,  1.08937351,  0.73697949]])

new_x.T[:, 0]

array([ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
       -0.97727788,  0.95008842, -0.15135721, -0.10321885,  0.4105985 ,
        0.14404357,  1.45427351,  0.76103773,  0.12167502,  0.44386323,
        0.33367433,  1.49407907, -0.20515826,  0.3130677 , -0.85409574,
       -2.55298982,  0.6536186 ,  0.8644362 , -0.74216502,  2.26975462,
       -1.45436567,  0.04575852, -0.18718385,  1.53277921,  1.46935877,
        0.15494743,  0.37816252, -0.88778575, -1.98079647, -0.34791215,
        0.15634897,  1.23029068,  1.20237985, -0.38732682, -0.30230275,
       -1.04855297, -1.42001794, -1.70627019,  1.9507754 , -0.50965218,
       -0.4380743 , -1.25279536,  0.77749036, -1.61389785, -0.21274028,
       -0.89546656,  0.3869025 , -0.51080514, -1.18063218, -0.02818223,
        0.42833187,  0.06651722,  0.3024719 , -0.63432209, -0.36274117,
       -0.67246045, -0.35955316, -0.81314628, -1.7262826 ,  0.17742614,
       -0.40178094, -1.63019835,  0.46278226, -0.90729836,  0.0519454 ,
        0.72909056,  0.12898291,  1.13940068, -1.23482582,  0.40234164,
       -0.68481009, -0.87079715, -0.57884966, -0.31155253,  0.05616534,
       -1.16514984,  0.90082649,  0.46566244, -1.53624369,  1.48825219,
        1.89588918,  1.17877957, -0.17992484, -1.07075262,  1.05445173,
       -0.40317695,  1.22244507,  0.20827498,  0.97663904,  0.3563664 ,
        0.70657317,  0.01050002,  1.78587049,  0.12691209,  0.40198936])

X = np.vstack((x1, x2)).T
X.shape

(100, 2)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 0.3, X[:, 0].max() + 0.3
y_min, y_max = X[:, 1].min() - 0.3, X[:, 1].max() + 0.3
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))





Z = clf.predict(np.c_[xx.ravel(), yy.ravel(), np.square(xx.ravel()), np.square(yy.ravel())])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
latexify()
yellow_patch = mpatches.Patch(color='yellow', label='Oranges')
blue_patch = mpatches.Patch(color='darkblue', label='Tomatoes')
pink_patch = mpatches.Patch(color='darksalmon', label='Predict oranges')
lblue_patch = mpatches.Patch(color='lightblue', label='Predict tomatoes')
plt.legend(handles=[yellow_patch, blue_patch, pink_patch, lblue_patch], loc='upper center',
           bbox_to_anchor=(0.5, 1.25),
          ncol=2, fancybox=True, shadow=True)

plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.4)
plt.gca().set_aspect('equal')
plt.scatter(x1, x2, c=y,s=5)
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")
plt.gca().set_aspect('equal')
plt.savefig("../figures/logistic-regression/logisitic-circular-prediction.pdf", bbox_inches="tight", transparent=True)

Z.shape

(261, 272)

np.c_[xx.ravel(), yy.ravel(), np.square(xx.ravel()), np.square(yy.ravel())]

array([[-2.85298982, -2.52340315,  8.13955089,  6.36756347],
       [-2.83298982, -2.52340315,  8.0258313 ,  6.36756347],
       [-2.81298982, -2.52340315,  7.9129117 ,  6.36756347],
       ...,
       [ 2.52701018,  2.67659685,  6.38578047,  7.16417069],
       [ 2.54701018,  2.67659685,  6.48726088,  7.16417069],
       [ 2.56701018,  2.67659685,  6.58954129,  7.16417069]])

xx.ravel()

array([-2.85298982, -2.83298982, -2.81298982, ...,  2.52701018,
        2.54701018,  2.56701018])

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - h, X[:, 0].max() + h
y_min, y_max = X[:, 1].min() - h, X[:, 1].max() + h
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))





Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel(), np.square(xx.ravel()), np.square(yy.ravel())])
# Put the result into a color plot
Z = Z[:, 0].reshape(xx.shape)
latexify()
plt.contourf(xx, yy, Z,levels=np.linspace(0, 1.1, num=10),cmap='Blues')
plt.gca().set_aspect('equal')
#plt.scatter(x1, x2, c=y)
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")
plt.colorbar(label='P(Tomatoes)')
plt.savefig("../figures/logistic-regression/logisitic-circular-probability.pdf", bbox_inches="tight", transparent=True)

xx.shape

(233, 244)

Z.size

np.linspace(0, 1.1, num=50)

array([0.        , 0.02244898, 0.04489796, 0.06734694, 0.08979592,
       0.1122449 , 0.13469388, 0.15714286, 0.17959184, 0.20204082,
       0.2244898 , 0.24693878, 0.26938776, 0.29183673, 0.31428571,
       0.33673469, 0.35918367, 0.38163265, 0.40408163, 0.42653061,
       0.44897959, 0.47142857, 0.49387755, 0.51632653, 0.53877551,
       0.56122449, 0.58367347, 0.60612245, 0.62857143, 0.65102041,
       0.67346939, 0.69591837, 0.71836735, 0.74081633, 0.76326531,
       0.78571429, 0.80816327, 0.83061224, 0.85306122, 0.8755102 ,
       0.89795918, 0.92040816, 0.94285714, 0.96530612, 0.9877551 ,
       1.01020408, 1.03265306, 1.05510204, 1.07755102, 1.1       ])

Other Formats