Graphics cheat sheet

# Install the ggplot package (don't need to do this on lab machines)
# (if you scroll down to the bottom of the list there's a mirror in
# Iowa that should be fast and reliable)
install.packages("ggplot2")

# Load the ggplot package
# - do this every time you want to use the ggplot package
library(ggplot)

# Examining distributions
# ======================================================

# Histograms ---------------------------------------
qplot(price, data=diamonds, geom="histogram")

# ALWAYS EXPERIMENT WITH THE BIN SIZE!
qplot(price, data=diamonds, geom="histogram", binwidth=500)
# ALWAYS EXPERIMENT WITH THE BIN SIZE!
qplot(price, data=diamonds, geom="histogram", binwidth=100)
# ALWAYS EXPERIMENT WITH THE BIN SIZE!
qplot(price, data=diamonds, geom="histogram", binwidth=50)

# Investigating relationships
# ======================================================

# Two continuous variables -----------------------------------
# Use a scatterplot
qplot(price, carat, data=diamonds)
qplot(log(carat), log(price), data=diamonds)
qplot(carat, price/carat, data=diamonds)

# Map extra variables to other aesthetic attributes
qplot(carat, price, data=diamonds, colour=color)
qplot(carat, price, data=diamonds, size=carat)
qplot(carat, price, data=diamonds, shape=cut)

# Facetting displays the same plot for different subsets of the data
# use facets argument, rows on left hand-size 
qplot(price, carat, data=diamonds, facets = . ~ color)
qplot(price, carat, data=diamonds, facets = color ~ clarity)

# One continuous, one categorical -----------------------------------

# There is too much overplotting in a scatterplot to be very useful
# we can basically only see the range of the data
qplot(color, price/carat, data=diamonds)

# We could spread the points out a little bit, by adding random jitter
# This helps a little bit, but it's still hard to see what's going on
qplot(color, price/carat, data=diamonds, position="jitter")
qplot(color, price/carat, data=diamonds, position=position_jitter(x=2))

# Another technique is to visualise the conditional distribution, ie.
# for a given colour, what is the distribution of price/colour.
# A boxplot provides a simple summary of the distribution:
qplot(color, price/carat, data=d, geom="boxplot")

# Or we can use a histogram for each colour to look at the shape of
# the distribution in more detail:
qplot(price/carat, data=d, facet= color ~ ., geom="histogram")

# ALWAYS EXPERIMENT WITH THE BIN SIZE!
qplot(price/carat, data=d, facet= color ~ ., geom="histogram", binwidth=100)

# Two categorical variables -----------------------------------
# Use a fluctuation diagram - a visualisation of the contingency table

ggfluctuation(table(d$cut, d$color))

# Zooming ---------------------------------------
# You can zoom in on an interesting area of the plot using the 
# xlim and ylim arguments:

qplot(price, data=d, geom="histogram", xlim=c(0, 5000))