import matplotlib.pyplot as plt
import numpy as np
import torch
import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
Set
ML
= np.linspace(0, 1, 100)
A = np.arange(0, 1, 0.01) A
# uncountable set
= np.random.rand(100000000)
x #print(x)
np.unique(x).size
100000000
# Set of lines in 2d
def line_fx(x, m, c):
return m*x + c
1, 2, 3) line_fx(
5
= np.linspace(-10, 10, 100)
x_lin = line_fx(x_lin, 2, 3)
y_lin_2_3
plt.plot(x_lin, y_lin_2_3)
from ipywidgets import interact
import ipywidgets as widgets
def plot_line(m, c):
= np.linspace(-10, 10, 100)
x_lin = line_fx(x_lin, m, c)
y_lin
=(8, 6))
plt.figure(figsize=f'Line: y = {m}x + {c}')
plt.plot(x_lin, y_lin, label0, color='black', linewidth=0.8, linestyle='--')
plt.axhline(0, color='black', linewidth=0.8, linestyle='--')
plt.axvline(=0.5)
plt.grid(alpha
plt.legend()"Interactive Line Plot")
plt.title("x")
plt.xlabel("y")
plt.ylabel(-10, 10)
plt.ylim(
plt.show()
# Interactive widget
interact(plot_line, =widgets.FloatSlider(value=1, min=-10, max=10, step=0.1, description='Slope (m)'),
m=widgets.FloatSlider(value=0, min=-10, max=10, step=0.1, description='Intercept (c)')); c
= np.random.rand(50000)
samples_uniform plt.hist(samples_uniform)
(array([5044., 4923., 5075., 4798., 4977., 4982., 4989., 5158., 4992.,
5062.]),
array([3.93041625e-05, 1.00032757e-01, 2.00026211e-01, 3.00019664e-01,
4.00013117e-01, 5.00006570e-01, 6.00000024e-01, 6.99993477e-01,
7.99986930e-01, 8.99980383e-01, 9.99973836e-01]),
<BarContainer object of 10 artists>)
samples_uniform
array([0.18674884, 0.81968297, 0.80087693, ..., 0.46548473, 0.47943922,
0.73968035])
= np.random.randn(50000)
samples_normal plt.hist(samples_normal)
(array([ 23., 285., 1985., 7467., 14803., 14914., 7988., 2201.,
309., 25.]),
array([-4.18722189, -3.35305381, -2.51888572, -1.68471764, -0.85054956,
-0.01638148, 0.8177866 , 1.65195468, 2.48612276, 3.32029084,
4.15445892]),
<BarContainer object of 10 artists>)
50] samples_normal[:
array([-1.02610077, 1.30693882, -0.60887913, -2.21509403, -0.49288884,
-0.24255606, -0.06006607, -0.43524481, 0.09849432, 0.65723845,
0.26736775, -0.23655818, -2.18103935, -0.49017392, 1.62213243,
0.38596106, 0.93529816, 1.08752614, -0.4461042 , -0.95299851,
1.38512913, 0.09622675, -0.72466762, -0.12871054, -0.50039256,
0.11997974, -1.54530777, 0.27708632, -1.59812337, 0.91816234,
-0.07142259, -1.00183667, 0.77816444, 0.24435284, 0.91035827,
0.60326872, 0.57121044, 1.26167048, -1.15016846, -0.69882365,
-1.07502868, -0.11305347, 0.82249031, 0.49697962, -1.21883061,
-1.96468898, -0.01928378, -0.56361649, 0.48693249, -0.27086149])
# Plot some 20 lines for random m and c
= np.random.rand(20)*20 - 10
m = np.random.rand(20)*20 - 10
c
for i in range(20):
= line_fx(x_lin, m[i], c[i])
y_lin =f'Line: y = {m[i]:.2f}x + {c[i]:.2f}') plt.plot(x_lin, y_lin, label
np.random.randn?
Signature: np.random.randn(*args)
Docstring:
randn(d0, d1, ..., dn)
Return a sample (or samples) from the "standard normal" distribution.
.. note::
This is a convenience function for users porting code from Matlab,
and wraps `standard_normal`. That function takes a
tuple to specify the size of the output, which is consistent with
other NumPy functions like `numpy.zeros` and `numpy.ones`.
.. note::
New code should use the
`~numpy.random.Generator.standard_normal`
method of a `~numpy.random.Generator` instance instead;
please see the :ref:`random-quick-start`.
If positive int_like arguments are provided, `randn` generates an array
of shape ``(d0, d1, ..., dn)``, filled
with random floats sampled from a univariate "normal" (Gaussian)
distribution of mean 0 and variance 1. A single float randomly sampled
from the distribution is returned if no argument is provided.
Parameters
----------
d0, d1, ..., dn : int, optional
The dimensions of the returned array, must be non-negative.
If no argument is given a single Python float is returned.
Returns
-------
Z : ndarray or float
A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from
the standard normal distribution, or a single such float if
no parameters were supplied.
See Also
--------
standard_normal : Similar, but takes a tuple as its argument.
normal : Also accepts mu and sigma arguments.
random.Generator.standard_normal: which should be used for new code.
Notes
-----
For random samples from the normal distribution with mean ``mu`` and
standard deviation ``sigma``, use::
sigma * np.random.randn(...) + mu
Examples
--------
>>> np.random.randn()
2.1923875335537315 # random
Two-by-four array of samples from the normal distribution with
mean 3 and standard deviation 2.5:
>>> 3 + 2.5 * np.random.randn(2, 4)
array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], # random
[ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) # random
Type: method
# Plot some 20 lines for random m and c
= 5000
N = np.random.randn(N)*0.5
m = np.random.randn(N)*0.5
c
for i in range(N):
= line_fx(x_lin, m[i], c[i])
y_lin =f'Line: y = {m[i]:.2f}x + {c[i]:.2f}',
plt.plot(x_lin, y_lin, label='k', alpha=0.01) color
# Set of cosines with varying phase (fixed amplitude and frequency)
def cosine_fx(x, A=1, f=1, phi=0):
return A*np.cos(2*np.pi*f*x + phi)
= np.linspace(-10, 10, 1000)
x_lin = cosine_fx(x_lin, 1, 1, 0)
y_cos_1_1_0
plt.plot(x_lin, y_cos_1_1_0)
def plot_cosine(A, f, phi):
= np.linspace(0, 2, 500) # x range for visualization
x = cosine_fx(x, A, f, phi)
y
=(8, 6))
plt.figure(figsize=f'Cosine: y = {A}cos(2π{f}x + {phi})')
plt.plot(x, y, label0, color='black', linewidth=0.8, linestyle='--')
plt.axhline(=0.5)
plt.grid(alpha
plt.legend()"Interactive Cosine Plot")
plt.title("x")
plt.xlabel("y")
plt.ylabel(-2, 2)
plt.ylim(
# Interactive widget
interact(plot_cosine, =widgets.FloatSlider(value=1, min=0.1, max=2, step=0.1, description='Amplitude (A)'),
A=widgets.FloatSlider(value=1, min=0.1, max=5, step=0.1, description='Frequency (f)'),
f=widgets.FloatSlider(value=0, min=0, max=2*np.pi, step=0.1, description='Phase (φ)')); phi
# Set datastructure in Python
= {1, 2, 3, 4, 5}
A print(A, type(A))
{1, 2, 3, 4, 5} <class 'set'>
= set([1, 2, 3, 4, 5])
A print(A, type(A))
{1, 2, 3, 4, 5} <class 'set'>
# unique elements
= {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}
A print(A, len(A))
{1, 2, 3, 4, 5} 5
# Can set contain a set?
= {1, 2, 3}
A for a in A:
print(a)
1
2
3
# why below code doesn't work? homework
try:
= {1, 2, 3, {4, 5}}
A except Exception as e:
print(e)
unhashable type: 'set'
# subset
= {1, 2, 3, 4, 5}
A = {1, 2, 3}
B
print(B.issubset(A))
True
print(A.issubset(B))
False
print(A.issuperset(B))
True
A.issubset(A)
True
# Other methods in set
A
{1, 2, 3, 4, 5}
# check proper subset
def is_proper_subset(A, B):
return A.issubset(B) and A != B
1, 2, 3}, {1, 2, 3, 4}) is_proper_subset({
True
1, 2, 3, 4}, {1, 2, 3, 4}) is_proper_subset({
False
1, 2, 6}, {1, 2, 3, 4}) is_proper_subset({
False
# empty set subset of every set
= set()
empty_set = {1, 2, 3}
A
empty_set.issubset(A)
True
is_proper_subset(empty_set, A)
True
# be careful with empty set definition. Below is not an empty set but
# a dictionary
= {}
empty_setprint(type(empty_set))
<class 'dict'>
# Sets in NumPy
= np.array([1, 2, 3, 4, 5])
A = np.array([1, 2, 3])
B
# is B a subset of A?
print(np.in1d(B, A).all())
True
/var/folders/z8/gpvqr8mn3w9_f38byxhnsk780000gn/T/ipykernel_16373/3300908401.py:6: DeprecationWarning: `in1d` is deprecated. Use `np.isin` instead.
print(np.in1d(B, A).all())
np.in1d?
Signature: np.in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None)
Call signature: np.in1d(*args, **kwargs)
Type: _ArrayFunctionDispatcher
String form: <function in1d at 0x107c70fe0>
File: ~/mambaforge/lib/python3.12/site-packages/numpy/lib/_arraysetops_impl.py
Docstring:
Test whether each element of a 1-D array is also present in a second array.
.. deprecated:: 2.0
Use :func:`isin` instead of `in1d` for new code.
Returns a boolean array the same length as `ar1` that is True
where an element of `ar1` is in `ar2` and False otherwise.
Parameters
----------
ar1 : (M,) array_like
Input array.
ar2 : array_like
The values against which to test each value of `ar1`.
assume_unique : bool, optional
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. Default is False.
invert : bool, optional
If True, the values in the returned array are inverted (that is,
False where an element of `ar1` is in `ar2` and True otherwise).
Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
to (but is faster than) ``np.invert(in1d(a, b))``.
kind : {None, 'sort', 'table'}, optional
The algorithm to use. This will not affect the final result,
but will affect the speed and memory use. The default, None,
will select automatically based on memory considerations.
* If 'sort', will use a mergesort-based approach. This will have
a memory usage of roughly 6 times the sum of the sizes of
`ar1` and `ar2`, not accounting for size of dtypes.
* If 'table', will use a lookup table approach similar
to a counting sort. This is only available for boolean and
integer arrays. This will have a memory usage of the
size of `ar1` plus the max-min value of `ar2`. `assume_unique`
has no effect when the 'table' option is used.
* If None, will automatically choose 'table' if
the required memory allocation is less than or equal to
6 times the sum of the sizes of `ar1` and `ar2`,
otherwise will use 'sort'. This is done to not use
a large amount of memory by default, even though
'table' may be faster in most cases. If 'table' is chosen,
`assume_unique` will have no effect.
.. versionadded:: 1.8.0
Returns
-------
in1d : (M,) ndarray, bool
The values `ar1[in1d]` are in `ar2`.
See Also
--------
isin : Version of this function that preserves the
shape of ar1.
Notes
-----
`in1d` can be considered as an element-wise function version of the
python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
equivalent to ``np.array([item in b for item in a])``.
However, this idea fails if `ar2` is a set, or similar (non-sequence)
container: As ``ar2`` is converted to an array, in those cases
``asarray(ar2)`` is an object array rather than the expected array of
contained values.
Using ``kind='table'`` tends to be faster than `kind='sort'` if the
following relationship is true:
``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
but may use greater memory. The default value for `kind` will
be automatically selected based only on memory usage, so one may
manually set ``kind='table'`` if memory constraints can be relaxed.
.. versionadded:: 1.4.0
Examples
--------
>>> import numpy as np
>>> test = np.array([0, 1, 2, 5, 0])
>>> states = [0, 2]
>>> mask = np.in1d(test, states)
>>> mask
array([ True, False, True, False, True])
>>> test[mask]
array([0, 2, 0])
>>> mask = np.in1d(test, states, invert=True)
>>> mask
array([False, True, False, True, False])
>>> test[mask]
array([1, 5])
Class docstring:
Class to wrap functions with checks for __array_function__ overrides.
All arguments are required, and can only be passed by position.
Parameters
----------
dispatcher : function or None
The dispatcher function that returns a single sequence-like object
of all arguments relevant. It must have the same signature (except
the default values) as the actual implementation.
If ``None``, this is a ``like=`` dispatcher and the
``_ArrayFunctionDispatcher`` must be called with ``like`` as the
first (additional and positional) argument.
implementation : function
Function that implements the operation on NumPy arrays without
overrides. Arguments passed calling the ``_ArrayFunctionDispatcher``
will be forwarded to this (and the ``dispatcher``) as if using
``*args, **kwargs``.
Attributes
----------
_implementation : function
The original implementation passed in.
np.isin?
Signature:
np.isin(
element,
test_elements,
assume_unique=False,
invert=False,
*,
kind=None,
)
Call signature: np.isin(*args, **kwargs)
Type: _ArrayFunctionDispatcher
String form: <function isin at 0x107c711c0>
File: ~/mambaforge/lib/python3.12/site-packages/numpy/lib/_arraysetops_impl.py
Docstring:
Calculates ``element in test_elements``, broadcasting over `element` only.
Returns a boolean array of the same shape as `element` that is True
where an element of `element` is in `test_elements` and False otherwise.
Parameters
----------
element : array_like
Input array.
test_elements : array_like
The values against which to test each value of `element`.
This argument is flattened if it is an array or array_like.
See notes for behavior with non-array-like parameters.
assume_unique : bool, optional
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. Default is False.
invert : bool, optional
If True, the values in the returned array are inverted, as if
calculating `element not in test_elements`. Default is False.
``np.isin(a, b, invert=True)`` is equivalent to (but faster
than) ``np.invert(np.isin(a, b))``.
kind : {None, 'sort', 'table'}, optional
The algorithm to use. This will not affect the final result,
but will affect the speed and memory use. The default, None,
will select automatically based on memory considerations.
* If 'sort', will use a mergesort-based approach. This will have
a memory usage of roughly 6 times the sum of the sizes of
`element` and `test_elements`, not accounting for size of dtypes.
* If 'table', will use a lookup table approach similar
to a counting sort. This is only available for boolean and
integer arrays. This will have a memory usage of the
size of `element` plus the max-min value of `test_elements`.
`assume_unique` has no effect when the 'table' option is used.
* If None, will automatically choose 'table' if
the required memory allocation is less than or equal to
6 times the sum of the sizes of `element` and `test_elements`,
otherwise will use 'sort'. This is done to not use
a large amount of memory by default, even though
'table' may be faster in most cases. If 'table' is chosen,
`assume_unique` will have no effect.
Returns
-------
isin : ndarray, bool
Has the same shape as `element`. The values `element[isin]`
are in `test_elements`.
Notes
-----
`isin` is an element-wise function version of the python keyword `in`.
``isin(a, b)`` is roughly equivalent to
``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.
`element` and `test_elements` are converted to arrays if they are not
already. If `test_elements` is a set (or other non-sequence collection)
it will be converted to an object array with one element, rather than an
array of the values contained in `test_elements`. This is a consequence
of the `array` constructor's way of handling non-sequence collections.
Converting the set to a list usually gives the desired behavior.
Using ``kind='table'`` tends to be faster than `kind='sort'` if the
following relationship is true:
``log10(len(test_elements)) >
(log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``,
but may use greater memory. The default value for `kind` will
be automatically selected based only on memory usage, so one may
manually set ``kind='table'`` if memory constraints can be relaxed.
.. versionadded:: 1.13.0
Examples
--------
>>> import numpy as np
>>> element = 2*np.arange(4).reshape((2, 2))
>>> element
array([[0, 2],
[4, 6]])
>>> test_elements = [1, 2, 4, 8]
>>> mask = np.isin(element, test_elements)
>>> mask
array([[False, True],
[ True, False]])
>>> element[mask]
array([2, 4])
The indices of the matched values can be obtained with `nonzero`:
>>> np.nonzero(mask)
(array([0, 1]), array([1, 0]))
The test can also be inverted:
>>> mask = np.isin(element, test_elements, invert=True)
>>> mask
array([[ True, False],
[False, True]])
>>> element[mask]
array([0, 6])
Because of how `array` handles sets, the following does not
work as expected:
>>> test_set = {1, 2, 4, 8}
>>> np.isin(element, test_set)
array([[False, False],
[False, False]])
Casting the set to a list gives the expected result:
>>> np.isin(element, list(test_set))
array([[False, True],
[ True, False]])
Class docstring:
Class to wrap functions with checks for __array_function__ overrides.
All arguments are required, and can only be passed by position.
Parameters
----------
dispatcher : function or None
The dispatcher function that returns a single sequence-like object
of all arguments relevant. It must have the same signature (except
the default values) as the actual implementation.
If ``None``, this is a ``like=`` dispatcher and the
``_ArrayFunctionDispatcher`` must be called with ``like`` as the
first (additional and positional) argument.
implementation : function
Function that implements the operation on NumPy arrays without
overrides. Arguments passed calling the ``_ArrayFunctionDispatcher``
will be forwarded to this (and the ``dispatcher``) as if using
``*args, **kwargs``.
Attributes
----------
_implementation : function
The original implementation passed in.
A, B
(array([1, 2, 3, 4, 5]), array([1, 2, 3]))
= np.in1d(B, A) response
/var/folders/z8/gpvqr8mn3w9_f38byxhnsk780000gn/T/ipykernel_16373/1008538269.py:1: DeprecationWarning: `in1d` is deprecated. Use `np.isin` instead.
response = np.in1d(B, A)
if False in response:
print("B is not a subset of A")
else:
print("B is a subset of A")
B is a subset of A
all() response.
np.True_
False, True]).astype(int).sum() np.array([
np.int64(1)
int).sum() == len(B) response.astype(
np.True_
all() np.isin(B, A).
np.True_
# Case where B is not a subset of A
= np.array([1, 2, 6])
B = np.array([1, 2, 3, 4, 5])
A
all() np.isin(B, A).
np.False_
= np.array([])
empty_set = np.array([1, 2, 3])
A
all() np.isin(empty_set, A).
np.True_
# Visualising sets using Venn diagrams
from matplotlib_venn import venn2
# Define the sets
= {1, 2, 3}
set1 = {2, 3, 5}
set2
# Create the Venn diagram
= venn2([set1, set2], ('Set 1', 'Set 2')) venn
# Define the sets
= {1, 2, 3}
setA = {2, 3, 5}
setB
# Create the Venn diagram
= venn2([setA, setB], ('A', 'B'))
venn
# Customize the labels to show the elements and sizes
try:
'10').set_text(
venn.get_label_by_id(f"A: {', '.join(map(str, setA - setB))}\n(Size: {len(setA - setB)})"
# Only in A
) '01').set_text(
venn.get_label_by_id(f"B: {', '.join(map(str, setB - setA))}\n(Size: {len(setB - setA)})"
# Only in B
) '11').set_text(
venn.get_label_by_id(f"A ∩ B: {', '.join(map(str, setA & setB))}\n(Size: {len(setA & setB)})"
# Intersection (A ∩ B)
) except:
pass
# Display the plot
"Venn Diagram with Labels A, B, and A ∩ B") plt.title(
Text(0.5, 1.0, 'Venn Diagram with Labels A, B, and A ∩ B')
= set([1,2,3])
Set_A = set([2,3,5])
Set_B
# Union
= Set_A.union(Set_B)
Union print('Union:', Union)
Union: {1, 2, 3, 5}
# numpy
= np.array(list(Set_A))
Set_A = np.array(list(Set_B))
Set_B
= np.union1d(Set_A, Set_B)
Union print('Union:', Union)
Union: [1 2 3 5]
# From scratch
= np.array([1, 2, 3])
a = np.array([2, 3, 5])
b
= a.copy()
union for i in b:
if i not in union:
= np.append(union, i)
union print('Union:', union)
Union: [1 2 3 5]
= []
union for element in a:
if element not in union:
union.append(element)for element in b:
if element not in union:
union.append(element)
= np.array(union)
union print('Union:', union)
Union: [1 2 3 5]
np.unique(np.concatenate([a, b]))
array([1, 2, 3, 5])
# Intersection
= set([1,2,3])
Set_A = set([2,3,5])
Set_B = Set_A.intersection(Set_B)
Intersection print('Intersection:', Intersection)
Intersection: {2, 3}
# Intersection using numpy
= np.array([1, 2, 3])
A = np.array([2, 3, 5])
B
= np.intersect1d(A, B)
Intersection print('Intersection:', Intersection)
Intersection: [2 3]
# From scratch
= []
intersection for i in a:
if i in b:
intersection.append(i)= np.array(intersection)
intersection print('Intersection:', intersection)
Intersection: [2 3]
# Difference
= Set_A.difference(Set_B)
Difference print('Difference:', Difference)
Difference: {1}
Set_B.difference(Set_A)
{5}
# Difference in numpy
= np.setdiff1d(A, B)
Difference_A_B print('Difference A/B:', Difference_A_B)
= np.setdiff1d(B, A)
Difference_B_A print('Difference B/A:', Difference_B_A)
Difference A/B: [1]
Difference B/A: [5]
# From scratch
= []
difference_A_B for i in a:
if i not in b:
difference_A_B.append(i)= np.array(difference_A_B)
difference_A_B print('Difference A/B:', difference_A_B)
Difference A/B: [1]
def difference(A, B):
"""
Function to find the difference between two sets A and B
A: numpy array 1d
B: numpy array 1d
Returns:
difference_A_B: numpy array 1d
"""
= []
difference_A_B for i in A:
if i not in B:
difference_A_B.append(i)= np.array(difference_A_B)
difference_A_B return difference_A_B
# Complement
= np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
universal_set
= np.array([1, 2, 3])
A
= np.setdiff1d(universal_set, A)
complement_A print('Complement of A:', complement_A)
Complement of A: [ 4 5 6 7 8 9 10]
# Disjoint sets
= set([1,2,3])
Set_A = set([4,5,6])
Set_B
= Set_A.intersection(Set_B)
Intersection print('Intersection:', Intersection)
if len(Intersection) == 0:
print('Sets are disjoint')
else:
print('Sets are not disjoint')
Intersection: set()
Sets are disjoint
= np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
collection_sets
collection_sets
# check if all sets are disjoint
= True disjoint
# Associative property of union
# (A ∪ B) ∪ C = A ∪ (B ∪ C)
= np.array([1, 2, 3])
A = np.array([2, 3, 4])
B = np.array([3, 4, 5])
C
# (A ∪ B) ∪ C
= np.union1d(np.union1d(A, B), C)
lhs print('(A ∪ B) ∪ C:', lhs)
# A ∪ (B ∪ C)
= np.union1d(A, np.union1d(B, C))
rhs print('A ∪ (B ∪ C):', rhs)
(A ∪ B) ∪ C: [1 2 3 4 5]
A ∪ (B ∪ C): [1 2 3 4 5]
# Associative property of intersection
# De Morgan's laws