# Generating random numbers -------------------------
rnorm(10)
rnorm(100)
rnorm(100, mean=10, sd=5)
rpois(10, 10)
# * normal = rnorm
# * poisson = rpois
# * binomial = rbinom
# * gamma = rgamma
# * look at help.search(keyword="distribution") for more
#
# Always check with the documentation that the distribution
# is parameterised the way you expect. Eg. for normal, standard
# deviation, not variance.
# Repeating a task -------------------------
#
# replicate(n, task) repeats task n times and joins the
# result into a single vector.
replicate(100, mean(rnorm(100)))
replicate(100, mean(rnorm(10)))
# Display an empirical distribution ----------------
#
# An empirical distribution is one derived from data,
# as opposed to a theoretical distribution.
qplot(replicate(100, mean(runif(10))), type="histogram")
# More replicates will give a better estimate of the distribution
# (but will take more time, obviously)
qplot(replicate(100, mean(runif(10))), type="histogram")
qplot(replicate(1000, mean(runif(10))), type="histogram")
qplot(replicate(10000, mean(runif(10))), type="histogram")
qplot(replicate(100000, mean(runif(10))), type="histogram")
# Central limit theorem -------------------------------
#
# Let use these tools to explore the central limit theorem.
# We will also use functions to reduce duplication in our code.
# First get the simple case working
qplot(replicate(100, mean(runif(10))), type="histogram")
# Easy! - we have already done this
# Next, figure out what parameters we want. We'll start with:
# * n = number of replicates
# * m = number of samples to average over
# Create variables, and then use those in the expression:
n <- 100
m <- 10
qplot(replicate(n, mean(runif(m))), type="histogram")
# This is _exactly_ what we had before, we've just replaced
# a couple of numbers with variables
# Next, we wrap it our expression in a function with the
# parameters we figured out:
(n, m) {
x <- replicate(n, mean(runif(m)))
qplot(x, type="histogram")
}
# Notice that I've indented the inside of the function to
# make it easier to see where it begins and ends. You'll want
# to create and edit functions in a text editor (or the script editor
# in R) - it's just too hard to do it line by line.
# Now check that it works:
clt(1000, 1)
clt(1000, 2)
clt(1000, 3)
clt(1000, 4)
# Let's make our function a bit more flexible, so that
# we can specify the distribution function as well.
# So we need one new parameter: r, the random number
# generating function
r <- runif
qplot(replicate(n, mean(r(m))), type="histogram")
# It works, so now for the next step:
(n, m, r) {
x <- replicate(n, mean(r(m)))
qplot(x, type="histogram")
}
clt(1000, 1, runif)
clt(1000, 1, rnorm)
clt(1000, 1, rpois)
# Uh oh! That last one didn't work. Why not?
# The rpois function needs another parameter, lambda, which
# we haven't supplied.
# The easiest way to get around this is to create a new
# random number generate that generates random poisson number
# with given lambda
clt(1000, 1, function(n) rpois(n, lambda=5))
clt(1000, 1, function(n) rpois(n, lambda=10))
clt(1000, 1, function(n) rpois(n, lambda=100))
# Here you can see we didn't name the function we created,
# we just passed it into another function. A function with
# no name is a called an anonymous function.
# Here we see that as the mean of the lambda increases, it
# becomes more symmetric.
# We can also improve our clt function in another way, by
# adding default values. When we don't specify the parameter in the
# function call, the default will be used instead.
(n=1000, m=1, r=runif) {
x <- replicate(n, mean(r(m)))
qplot(x, type="histogram")
}
# Here our defaults are: n = 100, m = 1 and r = runif
clt()
clt(10000)
clt(m=10)
clt(r=rnorm)