import numpy as np
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'
def angle_between(v1, v2):
= np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
cos_theta return np.arccos(np.clip(cos_theta, -1.0, 1.0)) * 180 / np.pi
= np.array([1, 0])
v1 = np.array([1, 1])
v2
0, 0], [0, 0], [v1[0], v2[0]], [v1[1], v2[1]], angles='xy', scale_units='xy', scale=1)
plt.quiver([-1, 2)
plt.xlim(-1, 2)
plt.ylim(
print(f"Angle: {angle_between(v1, v2):.2f}°")
Angle: 45.00°
print(f"Angle: {angle_between(v1, np.array([0, 1])):.2f}°")
0, 0], [0, 0], [v1[0], 0], [v1[1], 1], angles='xy', scale_units='xy', scale=1)
plt.quiver([-1, 2)
plt.xlim(-1, 2) plt.ylim(
Angle: 90.00°
import gensim.downloader as api
from scipy.spatial.distance import cosine
= api.load("glove-wiki-gigaword-50") # Small 50D GloVe model model
def get_cosine_similarity(word1, word2):
= model[word1]
v1 = model[word2]
v2 return angle_between(v1, v2)
= "king"
word1 = model[word1] v1
v1
array([ 0.50451 , 0.68607 , -0.59517 , -0.022801, 0.60046 , -0.13498 ,
-0.08813 , 0.47377 , -0.61798 , -0.31012 , -0.076666, 1.493 ,
-0.034189, -0.98173 , 0.68229 , 0.81722 , -0.51874 , -0.31503 ,
-0.55809 , 0.66421 , 0.1961 , -0.13495 , -0.11476 , -0.30344 ,
0.41177 , -2.223 , -1.0756 , -1.0783 , -0.34354 , 0.33505 ,
1.9927 , -0.04234 , -0.64319 , 0.71125 , 0.49159 , 0.16754 ,
0.34344 , -0.25663 , -0.8523 , 0.1661 , 0.40102 , 1.1685 ,
-1.0137 , -0.21585 , -0.15155 , 0.78321 , -0.91241 , -1.6106 ,
-0.64426 , -0.51042 ], dtype=float32)
= "queen"
word2 = model[word2]
v2 print(v2)
[ 0.37854 1.8233 -1.2648 -0.1043 0.35829 0.60029
-0.17538 0.83767 -0.056798 -0.75795 0.22681 0.98587
0.60587 -0.31419 0.28877 0.56013 -0.77456 0.071421
-0.5741 0.21342 0.57674 0.3868 -0.12574 0.28012
0.28135 -1.8053 -1.0421 -0.19255 -0.55375 -0.054526
1.5574 0.39296 -0.2475 0.34251 0.45365 0.16237
0.52464 -0.070272 -0.83744 -1.0326 0.45946 0.25302
-0.17837 -0.73398 -0.20025 0.2347 -0.56095 -2.2839
0.0092753 -0.60284 ]
angle_between(v1, v2)
38.3805515334704
"uncle"], model["aunt"]) angle_between(model[
40.26145236751397
# Now some dissimilar words
"king"], model["python"]) angle_between(model[
79.36413285046953
"king") model.most_similar(
[('prince', 0.8236179351806641),
('queen', 0.7839044332504272),
('ii', 0.7746230363845825),
('emperor', 0.7736247777938843),
('son', 0.766719400882721),
('uncle', 0.7627150416374207),
('kingdom', 0.7542160749435425),
('throne', 0.7539914846420288),
('brother', 0.7492411136627197),
('ruler', 0.7434254288673401)]
# most dissimilar words to "king"
=["king"]) model.most_similar(negative
[('4,835', 0.729358434677124),
('rules-based', 0.7123876810073853),
('renos', 0.7085371613502502),
('meawhile', 0.706490159034729),
('nanobiotechnology', 0.6925080418586731),
('m-42', 0.6916395425796509),
('poligny', 0.6882078051567078),
('onyekwe', 0.6877189874649048),
('asie', 0.6861312985420227),
('metabolomics', 0.682388961315155)]
"king"], model["rules-based"]) angle_between(model[
135.42952204160463