set.seed(1410) # Make the sample reproducible dsmall <- diamonds[sample(nrow(diamonds), 100), ] qplot(carat, price, data = diamonds) qplot(log(carat), log(price), data = diamonds) qplot(carat, x * y * z, data = diamonds) # Mapping point colour to diamond colour (left), and point shape to cut # quality (right). qplot(carat, price, data = dsmall, colour = color) qplot(carat, price, data = dsmall, shape = cut) # Reducing the alpha value from 1/10 (left) to 1/100 (middle) to 1/200 # (right) makes it possible to see where the bulk of the points lie. qplot(carat, price, data = diamonds, alpha = I(1/10)) qplot(carat, price, data = diamonds, alpha = I(1/100)) qplot(carat, price, data = diamonds, alpha = I(1/200)) # Smooth curves add to scatterplots of carat vs.\ price. The dsmall # dataset (left) and the full dataset (right). qplot(carat, price, data = dsmall, geom = c("point", "smooth")) qplot(carat, price, data = diamonds, geom = c("point", "smooth")) # The effect of the span parameter. (Left) \code{span = 0.2}, and # (right) \code{span = 1}. qplot(carat, price, data = dsmall, geom = c("point", "smooth"), span = 0.2) qplot(carat, price, data = dsmall, geom = c("point", "smooth"), span = 1) # The effect of the formula parameter, using a generalised additive # model as a smoother. (Left) \code{formula = y ~ s(x)}, the default; # (right) \code{formula = y ~ s(x, bs = "cs")}. library(mgcv) qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "gam", formula = y ~ s(x)) qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "gam", formula = y ~ s(x, bs = "cs")) # The effect of the formula parameter, using a linear model as a # smoother. (Left) \code{formula = y ~ x}, the default; (right) # \code{formula = y ~ ns(x, 5)}. library(splines) qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "lm") qplot(carat, price, data = dsmall, geom = c("point", "smooth"), method = "lm", formula = y ~ ns(x,5)) # Using jittering (left) and boxplots (right) to investigate the # distribution of price per carat, conditional on colour. As the # colour improves (from left to right) the spread of values decreases, # but there is little change in the centre of the distribution. qplot(color, price / carat, data = diamonds, geom = "jitter") qplot(color, price / carat, data = diamonds, geom = "boxplot") # Varying the alpha level. From left to right: $1/5$, $1/50$, $1/200$. # As the opacity decreases we begin to see where the bulk of the data # lies. However, the boxplot still does much better. qplot(color, price / carat, data = diamonds, geom = "jitter", alpha = I(1 / 5)) qplot(color, price / carat, data = diamonds, geom = "jitter", alpha = I(1 / 50)) qplot(color, price / carat, data = diamonds, geom = "jitter", alpha = I(1 / 200)) # Displaying the distribution of diamonds. (Left) \code{geom = # "histogram"} and (right) \code{geom = "density"}. qplot(carat, data = diamonds, geom = "histogram") qplot(carat, data = diamonds, geom = "density") # Varying the bin width on a histogram of carat reveals interesting # patterns. Binwidths from left to right: 1, 0.1 and 0.01 carats. Only # diamonds between 0 and 3 carats shown. qplot(carat, data = diamonds, geom = "histogram", binwidth = 1, xlim = c(0,3)) qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.1, xlim = c(0,3)) qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.01, xlim = c(0,3)) # Mapping a categorical variable to an aesthetic will automatically # split up the geom by that variable. (Left) Density plots are # overlaid and (right) histograms are stacked. qplot(carat, data = diamonds, geom = "density", colour = color) qplot(carat, data = diamonds, geom = "histogram", fill = color) # Bar charts of diamond colour. The left plot shows counts and the # right plot is weighted by \code{weight = carat} to show the total # weight of diamonds of each colour. qplot(color, data = diamonds, geom = "bar") qplot(color, data = diamonds, geom = "bar", weight = carat) + scale_y_continuous("carat") # Two time series measuring amount of unemployment. (Left) Percent of # population that is unemployed and (right) median number of weeks # unemployed. Plots created with {\tt geom="line"}. qplot(date, unemploy / pop, data = economics, geom = "line") qplot(date, uempmed, data = economics, geom = "line") # Path plots illustrating the relationship between percent of people # unemployed and median length of unemployment. (Left) Scatterplot # with overlaid path. (Right) Pure path plot coloured by year. year <- function(x) as.POSIXlt(x)$year + 1900 qplot(unemploy / pop, uempmed, data = economics, geom = c("point", "path")) qplot(unemploy / pop, uempmed, data = economics, geom = "path", colour = year(date)) + scale_area() # Histograms showing the distribution of carat conditional on colour. # (Left) Bars show counts and (right) bars show densities (proportions # of the whole). The density plot makes it easier to compare # distributions ignoring the relative abundance of diamonds within each # colour. High-quality diamonds (colour D) are skewed towards small # sizes, and as quality declines the distribution becomes more flat. qplot(carat, data = diamonds, facets = color ~ ., geom = "histogram", binwidth = 0.1, xlim = c(0, 3)) qplot(carat, ..density.., data = diamonds, facets = color ~ ., geom = "histogram", binwidth = 0.1, xlim = c(0, 3)) qplot( carat, price, data = dsmall, xlab = "Price ($)", ylab = "Weight (carats)", main = "Price-weight relationship" ) qplot( carat, price/carat, data = dsmall, ylab = expression(frac(price,carat)), xlab = "Weight (carats)", main="Small diamonds", xlim = c(.2,1) ) qplot(carat, price, data = dsmall, log = "xy")