stat645

Week 1

Data

library(ggplot2)
options(stringsAsFactors = FALSE)
relinc <- read.csv("religion-income.csv")

relinc$income <- factor(relinc$income, levels = c("<$10k", "$10-20k",
  "$20-30k", "$30-40k", "$40-50k", "$50-75k", "$75-100k", "$100-150k",
   ">150k", "Don't know/refused"))

Code sample 1

# 1 
relinc <- ddply(relinc, "religion", transform, prop.rel = freq / sum(freq))

# 2
combine <- c("<$10k" = "<$30k", "$10-20k" = "<$30k", 
  "$20-30k" = "<$30k", "$30-40k" = "$30-50k", "$40-50k" = "$30-50k",
   "$50-75k" = "$50-75k",  "$75-100k" = "$75-100k", "$100-150k" = ">$100k",
    ">150k" = ">$100k", "Don't know/refused" = NA)
relinc$income2 <- factor(combine[as.character(relinc$income)], 
  levels = unique(unname(combine)))

# 3
qplot(religion, data = relinc, weight = prop.rel, geom = "bar", fill = income2) + 
  coord_flip() + 
  scale_fill_brewer(pal = "Greens")