Random Processes¶
Textbook link: https://inferentialthinking.com/chapters/09/Randomness.html¶
Coin Flipping¶
For one coin flip, P(Heads)=0.5, P(Tails)=0.5
Probability of five consecutive heads is
P(H)xP(H)xP(H)xP(H)xP(H)=1/32
This is same as probability of zero heads in five flips
P(T)xP(T)xP(T)xP(T)xP(T)=1/32
# Only Heads
(1/2)*(1/2)*(1/2)*(1/2)*(1/2)
0.03125
Probability of exactly one head in 5 flips is
P(H)xP(T)xP(T)xP(T)xP(T)+
P(T)xP(H)xP(T)xP(T)xP(T)+
P(T)xP(T)xP(H)xP(T)xP(T)+
P(T)xP(T)xP(T)xP(H)xP(T)+
P(T)xP(T)xP(T)xP(T)xP(H)+
= 5/32
#Exactly One Heads
((1/2)*(1/2)*(1/2)*(1/2)*(1/2))*5
0.15625
Probability of exactly two or more heads in 5 flips is
1 - P(zero heads) - P(one head) = 26/32
# Two or more Heads
1-(1/32)-(5/32)
0.8125
Coin Flipping Simulator¶
import numpy as np
# Flip a coin once
print(np.random.choice(['heads','tails']))
# Flip a coin 5 times
print(np.random.choice(['heads','tails'], 5))
heads ['heads' 'tails' 'tails' 'tails' 'heads']
# Repeat 5 consecutive flips of a coin 10 times
for each_flip in np.arange(10):
outcomes = np.random.choice(['heads','tails'], 5)
print(outcomes)
['tails' 'heads' 'tails' 'tails' 'heads'] ['heads' 'tails' 'tails' 'tails' 'tails'] ['tails' 'heads' 'heads' 'heads' 'tails'] ['tails' 'tails' 'tails' 'tails' 'heads'] ['heads' 'heads' 'heads' 'tails' 'tails'] ['heads' 'heads' 'heads' 'tails' 'tails'] ['tails' 'heads' 'tails' 'tails' 'tails'] ['heads' 'tails' 'heads' 'tails' 'tails'] ['tails' 'heads' 'heads' 'heads' 'tails'] ['tails' 'heads' 'tails' 'tails' 'tails']
# Sum based on truth indices
sum(outcomes=='heads')
1
# Create an array with number of heads from each run
headcount_list=[]
for each_flip in np.arange(1000):
outcomes = np.random.choice(['heads','tails'], 5)
num_heads = sum(outcomes=='heads')
headcount_list.append(num_heads)
print(headcount_list)
[2, 1, 0, 4, 4, 4, 4, 3, 4, 2, 3, 3, 2, 3, 4, 2, 4, 2, 2, 3, 3, 4, 2, 4, 4, 2, 2, 2, 2, 3, 3, 3, 2, 2, 3, 1, 4, 1, 2, 2, 2, 3, 3, 2, 3, 4, 5, 2, 4, 4, 2, 5, 2, 5, 5, 2, 3, 0, 1, 2, 2, 3, 3, 2, 3, 1, 3, 3, 1, 3, 2, 2, 3, 4, 3, 1, 2, 4, 1, 3, 0, 3, 3, 4, 1, 2, 3, 2, 2, 3, 5, 2, 3, 4, 3, 4, 4, 4, 2, 5, 2, 1, 2, 2, 4, 3, 3, 4, 3, 3, 2, 2, 4, 3, 3, 2, 1, 3, 4, 1, 0, 3, 4, 3, 2, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 1, 2, 0, 2, 2, 3, 2, 2, 4, 3, 3, 3, 1, 2, 2, 3, 1, 3, 4, 2, 1, 2, 1, 2, 3, 1, 2, 1, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 4, 2, 0, 2, 3, 2, 2, 3, 1, 4, 2, 3, 1, 3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 1, 1, 2, 3, 1, 2, 2, 2, 5, 3, 4, 3, 2, 3, 5, 4, 1, 3, 3, 2, 2, 4, 3, 1, 2, 4, 2, 4, 1, 0, 3, 0, 1, 3, 0, 4, 2, 3, 4, 3, 2, 2, 3, 5, 1, 3, 2, 4, 2, 3, 4, 2, 2, 4, 4, 3, 4, 3, 5, 4, 1, 1, 4, 1, 4, 2, 1, 3, 5, 3, 2, 2, 3, 4, 2, 3, 3, 1, 1, 3, 1, 3, 2, 3, 3, 3, 3, 2, 3, 2, 1, 3, 2, 3, 2, 1, 4, 2, 2, 2, 2, 3, 3, 1, 1, 2, 1, 2, 2, 3, 3, 2, 1, 3, 0, 1, 3, 2, 1, 4, 2, 2, 3, 1, 1, 3, 2, 3, 0, 3, 2, 4, 3, 3, 3, 5, 4, 3, 2, 2, 3, 2, 2, 3, 2, 2, 2, 1, 3, 0, 2, 4, 2, 1, 3, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3, 1, 2, 1, 4, 4, 3, 3, 1, 3, 4, 1, 3, 1, 4, 2, 3, 2, 2, 1, 2, 3, 2, 0, 4, 2, 1, 3, 2, 2, 1, 3, 3, 2, 3, 4, 3, 3, 2, 5, 3, 1, 2, 3, 1, 2, 2, 3, 1, 5, 4, 4, 2, 0, 2, 4, 2, 3, 2, 3, 3, 2, 4, 2, 2, 2, 0, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 1, 0, 3, 4, 3, 1, 3, 3, 3, 3, 2, 1, 4, 2, 2, 2, 2, 2, 3, 5, 4, 2, 2, 2, 1, 2, 1, 3, 1, 4, 3, 2, 2, 0, 2, 3, 3, 1, 4, 2, 0, 3, 2, 3, 3, 0, 3, 1, 4, 3, 2, 2, 2, 3, 3, 2, 3, 3, 4, 3, 4, 4, 2, 2, 1, 3, 2, 0, 1, 4, 3, 3, 3, 1, 2, 3, 3, 4, 3, 4, 2, 3, 4, 1, 3, 2, 2, 3, 3, 3, 4, 2, 3, 4, 1, 3, 2, 2, 2, 2, 1, 3, 3, 2, 1, 2, 3, 2, 0, 5, 3, 2, 1, 2, 5, 1, 3, 3, 2, 2, 0, 4, 2, 1, 2, 4, 2, 1, 3, 2, 2, 3, 4, 3, 1, 2, 2, 5, 2, 2, 3, 2, 2, 3, 4, 3, 2, 5, 2, 3, 2, 4, 0, 2, 3, 3, 3, 2, 3, 2, 3, 2, 4, 5, 3, 2, 2, 4, 2, 4, 1, 2, 0, 2, 3, 2, 3, 2, 3, 3, 2, 1, 0, 3, 4, 2, 2, 2, 3, 4, 5, 3, 2, 2, 3, 2, 1, 3, 3, 1, 3, 2, 5, 2, 4, 2, 3, 3, 5, 3, 1, 3, 2, 2, 0, 3, 3, 4, 2, 4, 0, 3, 4, 3, 1, 1, 2, 1, 1, 2, 1, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 2, 2, 2, 2, 2, 4, 2, 1, 2, 2, 1, 4, 3, 3, 2, 2, 4, 2, 3, 1, 3, 3, 3, 2, 4, 4, 1, 3, 4, 2, 4, 4, 5, 3, 3, 2, 1, 3, 3, 1, 3, 2, 4, 2, 1, 4, 2, 2, 3, 1, 4, 1, 5, 2, 4, 3, 1, 4, 1, 3, 3, 3, 3, 5, 4, 1, 3, 2, 1, 3, 2, 3, 3, 3, 5, 2, 2, 2, 5, 4, 1, 0, 4, 3, 3, 3, 1, 2, 1, 1, 1, 2, 4, 2, 1, 2, 4, 3, 3, 3, 2, 4, 3, 3, 3, 2, 4, 2, 2, 2, 1, 3, 2, 4, 1, 2, 3, 1, 2, 3, 2, 2, 2, 1, 5, 4, 1, 1, 5, 2, 2, 4, 5, 1, 2, 0, 3, 2, 3, 3, 2, 1, 3, 4, 3, 4, 3, 3, 3, 1, 3, 1, 2, 2, 1, 3, 2, 0, 4, 1, 1, 2, 3, 2, 3, 4, 3, 3, 3, 2, 2, 2, 3, 2, 1, 3, 2, 4, 4, 1, 2, 2, 0, 3, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 3, 4, 1, 1, 4, 3, 1, 3, 1, 3, 2, 1, 4, 3, 0, 4, 1, 1, 1, 1, 4, 5, 1, 2, 3, 2, 3, 3, 2, 2, 1, 3, 1, 4, 2, 1, 2, 3, 3, 2, 2, 5, 3, 4, 1, 3, 3, 3, 2, 2, 2, 0, 2, 2, 3, 1, 4, 1, 3, 3, 3, 2, 1, 3, 3, 3, 3, 4, 1, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 4, 2, 2, 3, 3, 3, 2, 1, 3, 2, 3, 5, 3, 1, 2, 3, 3, 3, 4, 4, 5, 3, 4, 1, 3, 4, 2, 5, 2, 2, 3, 4, 2, 4, 2, 3, 3]
# Plotting the head counts
import matplotlib.pyplot as plt
fig,ax = plt.subplots()
ax.hist(headcount_list)
(array([ 33., 0., 149., 0., 317., 0., 329., 0., 136., 36.]), array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ]), <BarContainer object of 10 artists>)
# Proportion of outcomes when all flips were heads
sum(np.array(headcount_list)==5)/1000
0.036
# How many times did each outcome of total heads occur?
(values, counts) = np.unique(np.array(headcount_list), return_counts = True)
print(values)
print(counts)
[0 1 2 3 4 5] [ 33 149 317 329 136 36]
More than two outcomes¶
Suppose there are equal numbers of balls in a hat. What is an approximate value of getting exactly two blue and two purple balls if you take 7 balls out, replacing after each draw?
match_count = 0
num_simulations = 1000
# Simulate 1000 draws
for each_draw in range(num_simulations):
# Draw 7 times, replacing after each draw
results = np.random.choice(['red', 'blue', 'purple', 'cyan'], 7)
# Count number of blue balls
blue_count = sum(results=='blue')
# Count number of purple balls
purple_count = sum(results=='purple')
# Determine if two blue and two purple draw have been pulled
if (blue_count==2) & (purple_count==2):
# Add to match count if condition is met
match_count = match_count+1
# Calculate probability
prob_estimate = match_count/num_simulations
print(prob_estimate)
0.096
Birthday Problem¶
Assuming birthdays are evenly distributed across 365 days a year, what is the probability of two members of our class having the same birthday?
# Takes any group size and returns a set of birthdays matching that group size
def get_birthdays(group_size):
birthdays=np.random.choice(range(365),group_size)
return birthdays
# Takes a set of birthdays and determines the number of matched values
def count_matched_birthdays(birthdays):
values, counts=np.unique(birthdays,return_counts=True)
return sum(counts>1)
# Draws birthdays and counts matches many times. Returns probability of a match.
def calculate_probability(num_simulations, groupsize):
match_count=0
for trial in range(num_simulations):
birthdays=get_birthdays(groupsize)
matches=count_matched_birthdays(birthdays)
if matches:
match_count=match_count+1
return match_count/num_simulations
# Calculate the probability
calculate_probability(1000, 25)
0.584
# Calculate the probability of various group sizes
num_simulations=1000
class_sizes=[2,4,8,16,32,64,128,256]
prob_array=np.zeros(len(class_sizes))
for ind,val in enumerate(class_sizes):
prob_array[ind]=calculate_probability(num_simulations, val)
prob_array
array([0.001, 0.018, 0.086, 0.286, 0.765, 0.998, 1. , 1. ])
# Plot the probability of various group sizes
import matplotlib.pyplot as plt
fig,ax=plt.subplots(figsize=(4,4))
ax.plot(class_sizes,prob_array)
ax.scatter(class_sizes,prob_array,20)
ax.set_xlabel('group size')
ax.set_ylabel('probability of a matched birthday')
Text(0, 0.5, 'probability of a matched birthday')