Simulation cheat sheet

# Generating random numbers -------------------------
rnorm(100, mean=10, sd=5)
rpois(10, 10)

# * normal = rnorm
# * poisson = rpois
# * binomial = rbinom
# * gamma = rgamma
# * look at"distribution") for more
# Always check with the documentation that the distribution
# is parameterised the way you expect.  Eg. for normal, standard
# deviation, not variance.

# Repeating a task -------------------------
# replicate(n, task) repeats task n times and joins the 
# result into a single vector.

replicate(100, mean(rnorm(100)))
replicate(100, mean(rnorm(10)))

# Display an empirical distribution ----------------
# An empirical distribution is one derived from data,
# as opposed to a theoretical distribution.  

qplot(replicate(100, mean(runif(10))), type="histogram")

# More replicates will give a better estimate of the distribution
# (but will take more time, obviously)

qplot(replicate(100, mean(runif(10))), type="histogram")
qplot(replicate(1000, mean(runif(10))), type="histogram")
qplot(replicate(10000, mean(runif(10))), type="histogram")
qplot(replicate(100000, mean(runif(10))), type="histogram")

# Central limit theorem -------------------------------
# Let use these tools to explore the central limit theorem.
# We will also use functions to reduce duplication in our code.

# First get the simple case working
qplot(replicate(100, mean(runif(10))), type="histogram")
# Easy! - we have already done this

# Next, figure out what parameters we want.  We'll start with:
#  * n = number of replicates
#  * m = number of samples to average over
# Create variables, and then use those in the expression:

n <- 100
m <- 10
qplot(replicate(n, mean(runif(m))), type="histogram")

# This is _exactly_ what we had before, we've just replaced 
# a couple of numbers with variables

# Next, we wrap it our expression in a function with the 
# parameters we figured out:

clt <- function(n, m) {
  x <- replicate(n, mean(runif(m)))
  qplot(x, type="histogram")
# Notice that I've indented the inside of the function to
# make it easier to see where it begins and ends.  You'll want
# to create and edit functions in a text editor (or the script editor 
# in R) - it's just too hard to do it line by line.

# Now check that it works:
clt(1000, 1)
clt(1000, 2)
clt(1000, 3)
clt(1000, 4)

# Let's make our function a bit more flexible, so that
# we can specify the distribution function as well. 

# So we need one new parameter: r, the random number
# generating function

r <- runif
qplot(replicate(n, mean(r(m))), type="histogram")

# It works, so now for the next step:

clt <- function(n, m, r) {
  x <- replicate(n, mean(r(m)))
  qplot(x, type="histogram")

clt(1000, 1, runif)
clt(1000, 1, rnorm)
clt(1000, 1, rpois)

# Uh oh!  That last one didn't work.  Why not?
# The rpois function needs another parameter, lambda, which
# we haven't supplied.  

# The easiest way to get around this is to create a new 
# random number generate that generates random poisson number
# with given lambda

clt(1000, 1, function(n) rpois(n, lambda=5))
clt(1000, 1, function(n) rpois(n, lambda=10))
clt(1000, 1, function(n) rpois(n, lambda=100))

# Here you can see we didn't name the function we created,
# we just passed it into another function.  A function with
# no name is a called an anonymous function.

# Here we see that as the mean of the lambda increases, it 
# becomes more symmetric.  

# We can also improve our clt function in another way, by 
# adding default values.  When we don't specify the parameter in the 
# function call, the default will be used instead.

clt <- function(n=1000, m=1, r=runif) {
  x <- replicate(n, mean(r(m)))
  qplot(x, type="histogram")

# Here our defaults are: n = 100, m = 1 and r = runif