stat645
Week 1
Data
library(ggplot2)
options(stringsAsFactors = FALSE)
relinc <- read.csv("religion-income.csv")
relinc$income <- factor(relinc$income, levels = c("<$10k", "$10-20k",
"$20-30k", "$30-40k", "$40-50k", "$50-75k", "$75-100k", "$100-150k",
">150k", "Don't know/refused"))
Code sample 1
# 1
relinc <- ddply(relinc, "religion", transform, prop.rel = freq / sum(freq))
# 2
combine <- c("<$10k" = "<$30k", "$10-20k" = "<$30k",
"$20-30k" = "<$30k", "$30-40k" = "$30-50k", "$40-50k" = "$30-50k",
"$50-75k" = "$50-75k", "$75-100k" = "$75-100k", "$100-150k" = ">$100k",
">150k" = ">$100k", "Don't know/refused" = NA)
relinc$income2 <- factor(combine[as.character(relinc$income)],
levels = unique(unname(combine)))
# 3
qplot(religion, data = relinc, weight = prop.rel, geom = "bar", fill = income2) +
coord_flip() +
scale_fill_brewer(pal = "Greens")