import networkx as nx
import itertools
from matplotlib import rc
"font", family="serif", size=12)
rc("text", usetex=True)
rc(import daft
import random
import requests
import numpy as np
from IPython.display import HTML
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
Imports
Defining the network
={
network"V": ["Letter", "Grade", "Intelligence", "SAT", "Difficulty"],
"E": [["Intelligence", "Grade"],
"Difficulty", "Grade"],
["Intelligence", "SAT"],
["Grade", "Letter"]],
["Vdata": {
"Letter": {
"ord": 4,
"numoutcomes": 2,
"vals": ["weak", "strong"],
"parents": ["Grade"],
"children": None,
"cprob": {
"['A']": [.1, .9],
"['B']": [.4, .6],
"['C']": [.99, .01]
}
},
"SAT": {
"ord": 3,
"numoutcomes": 2,
"vals": ["lowscore", "highscore"],
"parents": ["Intelligence"],
"children": None,
"cprob": {
"['low']": [.95, .05],
"['high']": [.2, .8]
}
},
"Grade": {
"ord": 2,
"numoutcomes": 3,
"vals": ["A", "B", "C"],
"parents": ["Difficulty", "Intelligence"],
"children": ["Letter"],
"cprob": {
"['easy', 'low']": [.3, .4, .3],
"['easy', 'high']": [.9, .08, .02],
"['hard', 'low']": [.05, .25, .7],
"['hard', 'high']": [.5, .3, .2]
}
},
"Intelligence": {
"ord": 1,
"numoutcomes": 2,
"vals": ["low", "high"],
"parents": None,
"children": ["SAT", "Grade"],
"cprob": [.7, .3]
},
"Difficulty": {
"ord": 0,
"numoutcomes": 2,
"vals": ["easy", "hard"],
"parents": None,
"children": ["Grade"],
"cprob": [.6, .4]
}
} }
Drawing the Bayesian Network
= daft.PGM([8, 8], origin=[0, 0])
pgm
'Difficulty',r"Difficulty",2,6,aspect=3))
pgm.add_node(daft.Node('Intelligence',r"Intelligence",5,6,aspect=3))
pgm.add_node(daft.Node('Grade',r"Grade",3,4,aspect=3))
pgm.add_node(daft.Node('SAT',r"SAT",6,4,aspect=3))
pgm.add_node(daft.Node('Letter',r"Letter",4,2,aspect=3))
pgm.add_node(daft.Node(
for node in network['Vdata']:
=network['Vdata'][node]['parents']
parentsif parents is not None:
for parent in parents:
pgm.add_edge(parent, node) pgm.render()
Finding the Markov blanket of a node
def find_markov_blanket(node,network):
'''
Find the Markov Blanket of the node in the given network
Markov Blanket is given by:
1. The parents of the node
2. The children of the node
3. The parents of the children of the node
'''
=[]
mb#Finding the parents of the node
=network['Vdata'][node]['parents']
parentsif parents is not None:
mb.append(parents)
#Finding children of the node
=network['Vdata'][node]['children']
childrenif children is not None:
mb.append(children)
#Finding parent of each node
for child in children:
=network['Vdata'][child]['parents']
parents_childif parents is not None:
mb.append(parents)
#Flattening out list of lists
=list(itertools.chain(*mb))
mb
#Removing repeated elements
=list(set(mb))
mbreturn mb
'Grade',network) find_markov_blanket(
['Difficulty', 'Letter', 'Intelligence']
Gibbs Sampling Procedures
Assigning a random state to a node in the network
def pick_random(node,network):
'''
Assigns a random state to a given node
N
'''
=network['Vdata'][node]["numoutcomes"]
num_outcomes=random.randint(0,num_outcomes-1)
random_indexreturn network['Vdata'][node]["vals"][random_index]
'SAT',network) pick_random(
'lowscore'
Pick a random non evidence node to the update in the current iteration
def pick_random_non_evidence_node(non_evidence_nodes):
return non_evidence_nodes[random.randint(0,len(non_evidence_nodes)-1)]
Update the value of a node given assignment in previous iteration
def get_next_value(node, network,simulation):
=network['Vdata'][node]['parents']
parents_current_node_to_updateif parents_current_node_to_update is None:
#The node has no parent and we can update it based on the prior
=np.cumsum(network['Vdata'][node]["cprob"])
cumsumelse:
#Find the row corresponding to the values of the parents in the previous iteration
#NB We need to maintain the order, so we will do it
=[simulation[-1][parent] for parent in parents_current_node_to_update]
values_parents=network['Vdata'][node]["cprob"][str(values_parents)]
row=np.cumsum(row)
cumsum=random.random()
choice=np.argmax(cumsum>choice)
indexreturn network['Vdata'][node]["vals"][index]
Main procedure: Iteratively pick up a non evidence node to update
def gibbs_sampling(network, evidence, niter=2):
=[]
simulation=network['V']
nodes=[node for node in nodes if node not in evidence.keys()]
non_evidence_nodes#First iteration random value for all nodes
={}
dfor node in nodes:
=pick_random(node,network)
d[node]#Put evidence
for node in evidence:
=evidence[node]
d[node]
simulation.append(d.copy()) #Now iterate
for count in xrange(niter):
#Pick up a random node to start
=pick_random_non_evidence_node(non_evidence_nodes)
current_node_to_update=get_next_value(current_node_to_update,network,simulation)
d[current_node_to_update]
simulation.append(d.copy())return simulation
Illustration 1
Distribution of Letter given that the student is Intelligent
=int(1e4)
iterations=gibbs_sampling(network, {"Intelligence":"high"},iterations) sim
Removing first 10% samples
=sim[iterations/10:]
after_removing_burnt_samples={val:0 for val in network['Vdata']['Letter']['vals']} count
Finding the distribution of letter
for assignment in after_removing_burnt_samples:
'Letter']]+=1 count[assignment[
Counts
count
{'strong': 7061, 'weak': 1940}
Counts to Probabilites
={}
probabilitesfor l in count:
=count[l]*1.0/(.90*iterations)
probabilites[l] probabilites
{'strong': 0.7845555555555556, 'weak': 0.21555555555555556}
Wait a min! What about the marginal distribution of Letter given NO evidence
=int(1e4)
iterations=gibbs_sampling(network, {},iterations)
sim=sim[iterations/10:]
after_removing_burnt_samples={val:0 for val in network['Vdata']['Letter']['vals']}
countfor assignment in after_removing_burnt_samples:
'Letter']]+=1
count[assignment[={}
probabilites_no_evidencefor l in count:
=count[l]*1.0/(.90*iterations)
probabilites_no_evidence[l] probabilites_no_evidence
{'strong': 0.4766666666666667, 'weak': 0.5234444444444445}
How does the evidence about “Intelligent” student affect the quality of letters?
=(10, 8))
plt.figure(figsize2,2,1)
plt.subplot(range(2),[probabilites['strong'],probabilites['weak']])
plt.bar(0.5,1.5],['Strong','Weak'])
plt.xticks(['Letter Quality given Intelligent Student')
plt.title(0,1.0))
plt.ylim((2,2,2)
plt.subplot(range(2),[probabilites_no_evidence['strong'],probabilites_no_evidence['weak']])
plt.bar(0.5,1.5],['Strong','Weak'])
plt.xticks(['Letter Quality given no prior information')
plt.title(0,1.0)); plt.ylim((