import numpy as np
import matplotlib.pyplot as plt
import torch
import seaborn as sns
import pandas as pd
=torch.distributions
dist
sns.reset_defaults()="talk", font_scale=1)
sns.set_context(context%matplotlib inline
%config InlineBackend.figure_format='retina'
Basic Imports
# Generate complex data
= np.linspace(-2, 2, 100)
x = lambda x: x**2
freq = lambda x: np.sin(2 * np.pi * x * freq(x))
f = f(x) + np.random.randn(x.shape[0]) * 0.1
y
# Plot
=(8, 5))
plt.figure(figsize'o', label='data')
plt.plot(x, y, ='true function')
plt.plot(x, f(x), label plt.legend()
# Learn linear model on data
from sklearn.linear_model import LinearRegression, Ridge
= Ridge()
lr1 -1, 1), y.reshape(-1, 1))
lr1.fit(x.reshape(
# Predict on linspace and plot
= lr1.predict(x.reshape(-1, 1))
y_pred =(8, 5))
plt.figure(figsize'o', label='data')
plt.plot(x, y, ='true function')
plt.plot(x, f(x), label='linear model') plt.plot(x, y_pred, label
# Add position encoding
# Gamma(x) = [sin(2^0*pi*x), cos(2^0*pi*x), sin(2^1*pi*x), cos(2^1*pi*x), ..., sin(2^k*pi*x), cos(2^k*pi*x)]
def gamma(x, k):
"""
x: (N, 1)
k: int
Output: (N, 2k)
"""
= x.reshape(-1, 1)
x = np.repeat(x, k, axis=1)
x = x * (2 ** np.arange(k) * np.pi)
x = np.concatenate([np.sin(x), np.cos(x)], axis=1)
z # Concatenate x and z
= np.concatenate([x, z], axis=1)
z return z
2), alpha=0.2, color='k') plt.plot(x, gamma(x,
# Fit linear model with position encoding for k
def fit_plot(x, k):
= Ridge()
lr = gamma(x, k)
X_new -1, 1))
lr.fit(X_new, y.reshape(= lr.predict(X_new)
y_pred =(8, 5))
plt.figure(figsize'o', label='data')
plt.plot(x, y, ='true function')
plt.plot(x, f(x), label='linear model with position encoding of order {}'.format(k), lw=3)
plt.plot(x, y_pred, label# Legend outside plot
=(1.05, 1), loc=2, borderaxespad=0.)
plt.legend(bbox_to_anchor
# Title is the score
'Score: {:.2f}'.format(lr.score(X_new, y.reshape(-1, 1))))
plt.title(
# Show extrapolation also
# extrapolation points are set difference between linspace and data points
= np.linspace(-3, 3, 100)
x_extra = gamma(x_extra, k)
X_extra = lr.predict(X_extra)
y_extra '--', label='extrapolation', alpha=0.2)
plt.plot(x_extra, y_extra, =(1.05, 1), loc=2, borderaxespad=0.)
plt.legend(bbox_to_anchor
1) fit_plot(x,
ValueError: Found input variables with inconsistent numbers of samples: [100, 768]
2) fit_plot(x,
3) fit_plot(x,
7) fit_plot(x,
10) fit_plot(x,
70) fit_plot(x,
!wget https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.csv
--2023-06-09 15:39:49-- https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.csv
Resolving gml.noaa.gov (gml.noaa.gov)... 140.172.200.41, 2610:20:8800:6101::29
Connecting to gml.noaa.gov (gml.noaa.gov)|140.172.200.41|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38018 (37K) [text/csv]
Saving to: ‘co2_mm_mlo.csv’
co2_mm_mlo.csv 100%[===================>] 37.13K 146KB/s in 0.3s
2023-06-09 15:39:51 (146 KB/s) - ‘co2_mm_mlo.csv’ saved [38018/38018]
= pd.read_csv('co2_mm_mlo.csv', header=None, skiprows=72) df
= df.index.values
X = df[3].values.reshape(-1, 1) y
plt.plot(X, y)