library(reshape2) library(lubridate) options(stringsAsFactors = FALSE) pop <- read.csv("pop-2000.csv") popm <- melt(pop, c("year", "artist.inverted", "track", "time", "genre", "date.entered", "date.peaked")) # Convert week to a number popm$week <- as.numeric(str_replace_all(popm$variable, "[^0-9]+", "")) popm$variable <- NULL # Remove values from songs that didn't make it that long dim(popm) popm <- subset(popm, !is.na(value)) dim(popm) # Calculate the actual date popm$date <- ymd(popm$date.entered) + weeks(popm$week - 1) popm <- subset(popm, year(date) == 2000) popm <- arrange(popm, date, value) # Exploratory graphics ------------------------------------------------------- library(ggplot2) qplot(date, value, data = popm) qplot(date, value, data = popm, geom = "line", group = track) qplot(date, value, data = popm, geom = "line", group = track) + scale_y_reverse() # Focus only on tracks who made it to number one top1 <- unique(popm$track[popm$value == 1]) qplot(date, value, data = subset(popm, track %in% top1), geom = "line", group = track) + scale_y_reverse() + facet_wrap(~track) # Just look at destiny's child dc <- subset(popm, artist.inverted == "Destiny's Child") qplot(date, value, data = dc, geom = "line", colour = track) + scale_y_reverse() # install.packages("directlabels") library(directlabels) direct.label(last_plot(), last.points)