import random
import numpy as np
import matplotlib.pyplot as plt
import math
import time
from scipy import stats


distinctStickers = 5
packSize = 2
myStickers = [0] * distinctStickers
k = 1
while myStickers.count(0) !=  1:
  package = random.sample(range(0,distinctStickers),packSize)
  print(myStickers)
  for i in package:
    if i not in myStickers:
      myStickers[i] = i
  k += 1
print(myStickers)
print(k)

[0, 0, 0, 0, 0]
[0, 0, 0, 3, 4]
[0, 0, 0, 3, 4]
[0, 0, 0, 3, 4]
[0, 1, 0, 3, 4]
[0, 1, 2, 3, 4]
6


#input M and n, then returns the number of packages that completed the set
def stickers(distinctStickers, packSize):
  myStickers = [0] * distinctStickers
  k = 0
  while myStickers.count(0) !=  1:
    package = random.sample(range(0,distinctStickers),packSize)
    for i in package:
      if i not in myStickers:
        myStickers[i] = i
    k += 1
  return k


stickers(649, 5)

935


startTime = time.time()
stickerList = [stickers(649,5) for i in range(10000)]
endTime = time.time()
#print("runtime:", endTime - startTime)
runTime = endTime - startTime
print(runTime)

423.47702288627625


listMean = sum(stickerList)/10000
print(listMean)

916.1791


plt.hist(stickerList, bins = 50)
plt.xlim(0,2000)
plt.xlabel("number of packs bought to complete the set")
plt.ylabel("frequency");


test = np.array([1,1,2,3,4])
props = np.zeros((5,2))
start = 0
stop = 5
sum = 0
for i in range(start,stop):
  props[i,0] = i
  sum = sum + np.count_nonzero(test == i)
  props[i,1] = sum/(stop-start)

props

array([[0. , 0. ],
       [1. , 0.4],
       [2. , 0.6],
       [3. , 0.8],
       [4. , 1. ]])


#input a list of many simulations, output is coordinates for a probability graph
def get_proportions(stickerList):
  stickerList = np.sort(stickerList)
  start = stickerList.min()
  stop = stickerList.max()
  props = np.zeros((stop, 2))
  sum = 0
  for i in range(start, stop):
    props[i,0] = i
    sum = sum + np.count_nonzero(stickerList == i)
    props[i,1] = sum/len(stickerList)

  return props


test_list =  [stickers(100,5) for i in range(10000)]
test_list


test_plot=get_proportions(test_list)
plt.plot(test_plot[:,0],test_plot[:,1]);


test_plot = get_proportions(stickerList)
plt.plot(test_plot[:,0],test_plot[:,1])
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set");


error1 = stats.sem(stickerList)


def find99(list):
  for i in range(len(list)):
    if list[i,1] >= 0.99:
      return i


find99(test_plot)

1448


startTime = time.time()
stickerList2 = [stickers(649,1) for i in range(10000)]
endTime = time.time()
#print("runtime:", endTime - startTime)
runTime2 = endTime - startTime
print(runTime2)

841.7757334709167


listMean2 = sum(stickerList2)/10000
print(listMean2)

4585.2128


plt.hist(stickerList2, bins = 50)
plt.xlim(0,12000)
plt.xlabel("number of packs bought to complete the set")
plt.ylabel("frequency");


error2 = stats.sem(stickerList2)


test_plot2 = get_proportions(stickerList2)
plt.plot(test_plot2[:,0],test_plot2[:,1])
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set");


find99(test_plot2)

7219


startTime = time.time()
stickerList3 = [stickers(649,10) for i in range(10000)]
endTime = time.time()
#print("runtime:", endTime - startTime)
runTime3 = endTime - startTime
print(runTime3)

399.7271192073822


listMean3 = sum(stickerList3)/10000
print(listMean3)

455.4349


plt.hist(stickerList3, bins = 50)
plt.xlim(0,1500)
plt.xlabel("number of packs bought to complete the set")
plt.ylabel("frequency");


error3 = stats.sem(stickerList3)


test_plot3 = get_proportions(stickerList3)
plt.plot(test_plot3[:,0],test_plot3[:,1])
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set");


find99(test_plot3)

709


startTime = time.time()
stickerList4 = [stickers(649,50) for i in range(10000)]
endTime = time.time()
#print("runtime:", endTime - startTime)
runTime4 = endTime - startTime
print(runTime4)

308.27709674835205


listMean4 = sum(stickerList4)/10000
print(listMean4)

88.3635


plt.hist(stickerList4, bins = 30)
plt.xlim(0,300)
plt.xlabel("number of packs bought to complete the set")
plt.ylabel("frequency");


error4 = stats.sem(stickerList4)


test_plot4 = get_proportions(stickerList4)
plt.plot(test_plot4[:,0],test_plot4[:,1])
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set");


find99(test_plot4)

138


startTime = time.time()
stickerList5 = [stickers(649,100) for i in range(10000)]
endTime = time.time()
#print("runtime:", endTime - startTime)
runTime5 = endTime - startTime
print(runTime5)

349.13524198532104


listMean5 = sum(stickerList5)/10000
print(listMean5)

42.5329


plt.hist(stickerList5, bins = 25)
plt.xlim(0,100)
plt.xlabel("number of packs bought to complete the set")
plt.ylabel("frequency");


error5 = stats.sem(stickerList5)


test_plot5 = get_proportions(stickerList5)
plt.plot(test_plot5[:,0],test_plot5[:,1])
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set");


find99(test_plot5)

66


startTime = time.time()
stickerList6 = [stickers(649,500) for i in range(10000)]
endTime = time.time()
#print("runtime:", endTime - startTime)
runTime6 = endTime - startTime
print(runTime6)

203.04116225242615


listMean6 = sum(stickerList6)/10000
print(listMean6)

5.3163


plt.hist(stickerList6, bins = 10)
plt.xlim(0,20)
plt.xlabel("number of packs bought to complete the set")
plt.ylabel("frequency");


error6 = stats.sem(stickerList6)


test_plot6 = get_proportions(stickerList6)
plt.plot(test_plot6[:,0],test_plot6[:,1])
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set");


find99(test_plot6)

8


startTime = time.time()
stickerList7 = [stickers(649,649) for i in range(10000)]
endTime = time.time()
#print("runtime:", endTime - startTime)
runTime7 = endTime - startTime
print(runTime7)

75.48822951316833


listMean7 = sum(stickerList7)/10000
print(listMean7)

1.0


plt.hist(stickerList7, bins = 5)
plt.xlim(0,5)
plt.xlabel("number of packs bought to complete the set")
plt.ylabel("frequency");


error7 = stats.sem(stickerList7)


test_plot7 = get_proportions(stickerList7)
plt.plot(test_plot7[:,0],test_plot7[:,1])
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set");


print(find99(test_plot7))

None


plt.plot(test_plot2[:,0],test_plot2[:,1], label = "1")
plt.plot(test_plot[:,0],test_plot[:,1], label = "5")
plt.plot(test_plot3[:,0],test_plot3[:,1], label = "10")
plt.plot(test_plot4[:,0],test_plot4[:,1], label = "50")
plt.plot(test_plot5[:,0],test_plot5[:,1], label = "100")
plt.plot(test_plot6[:,0],test_plot6[:,1], label = "500")
plt.plot(test_plot7[:,0],test_plot7[:,1], label = "649")
plt.xlabel("packs bought")
plt.ylabel("probability of completing the full set")
plt.legend(loc="lower right", title = "pack size");


#list to serve as the x values for all the points plotted in the plots below
packSizeList = [1,5,10,50,100,500,649]


packsBoughtList = []
packsBoughtList.append(listMean2)
packsBoughtList.append(listMean)
packsBoughtList.append(listMean3)
packsBoughtList.append(listMean4)
packsBoughtList.append(listMean5)
packsBoughtList.append(listMean6)
packsBoughtList.append(listMean7)
print(packsBoughtList)

[4585.2128, 916.1791, 455.4349, 88.3635, 42.5329, 5.3163, 1.0]


plt.plot(packSizeList, packsBoughtList)
plt.xlabel("stickers per pack")
plt.ylabel("average packs needed to complete the set");


#list without 1 so trend is clearer (pack size of 1 so much higher than rest of data)
packSizeList2 = [5,10,50,100,500,649]
packsBoughtList2 = []
packsBoughtList2.append(listMean)
packsBoughtList2.append(listMean3)
packsBoughtList2.append(listMean4)
packsBoughtList2.append(listMean5)
packsBoughtList2.append(listMean6)
packsBoughtList2.append(listMean7)
print(packsBoughtList2)

[916.1791, 455.4349, 88.3635, 42.5329, 5.3163, 1.0]


#zoom in on section of plot where most of the change is happening
plt.plot(packSizeList2, packsBoughtList2)
plt.xlim(0,150)
plt.xlabel("stickers per pack")
plt.ylabel("average packs needed to complete the set");


runTimeList = []
runTimeList.append(round(runTime2,5))
runTimeList.append(round(runTime,5))
runTimeList.append(round(runTime3,5))
runTimeList.append(round(runTime4,5))
runTimeList.append(round(runTime5,5))
runTimeList.append(round(runTime6,5))
runTimeList.append(round(runTime7,5))
print(runTimeList)

[841.77573, 423.47702, 399.72712, 308.2771, 349.13524, 203.04116, 75.48823]


plt.plot(packSizeList, runTimeList)
plt.xlabel("stickers per pack")
plt.ylabel("runtime (seconds)");


errorList = []
errorList.append(round(error2,5))
errorList.append(round(error1,5))
errorList.append(round(error3,5))
errorList.append(round(error4,5))
errorList.append(round(error5,5))
errorList.append(round(error6,5))
errorList.append(round(error7,5))
print(errorList)

[8.35813, 1.66933, 0.81337, 0.1585, 0.07505, 0.00921, 0.0]


plt.plot(packSizeList, errorList)
plt.xlabel("stickers per pack")
plt.ylabel("standard error");


#list without 1 so trend is clearer (pack size of 1 so much higher than rest of data)
errorList2 = []
errorList2.append(round(error1,5))
errorList2.append(round(error3,5))
errorList2.append(round(error4,5))
errorList2.append(round(error5,5))
errorList2.append(round(error6,5))
errorList2.append(round(error7,5))
print(errorList2)

[1.66933, 0.81337, 0.1585, 0.07505, 0.00921, 0.0]


#zoom in on section of plot where most of the change is happening
plt.plot(packSizeList2, errorList2)
plt.xlim(0,150)
plt.xlabel("stickers per pack")
plt.ylabel("standard error");

The Monte Carlo Simulation¶

Finding Runtimes¶

Answering the Sticker Collector's Problem (M = 649, n = 5)¶

Finding how many packs are needed to be 99% confident in getting the full set¶

1 sticker per pack¶

10 stickers per pack¶

50 stickers per pack¶

100 stickers per pack¶

500 stickers per pack¶

649 stickers per pack¶

Probability Plot¶

How Many Packs on Average to Complete the Set?¶

Runtime Analysis - How Long Does it Take to Run 10,000 Monte Carlo Simulations for Different Pack Sizes?¶

Standard Error Analysis¶

Possible Next Steps¶