stat405

Function drills, by Garrett Grolemund

Mathematical operations

  1. Return the circumference of a circle with the given radius.

    c_circ <- function(r){
      2 * pi * r
    }
  2. Return the area of a circle with the given radius.

    c_area <- function(r){
      pi * r ^ 2
    }
  3. Return the volume of a circle with the given radius.

    c_vol <- function(r){
      4 / 3 * pi * r ^ 3
    }
  4. Return the circumference, area (of the largest cross-section), and volume of a sphere with the given radius. Each should be labelled in the functions output.

    c_stats <- function(r){
      c(circumference = c_circ(r),
        area = c_area(r),
        volume = c_vol(r)
      )
    }
    
    # note: c_circ(), c_area(), and c_vol() were all defined in the previous drills
    
  5. Given the coefficients of a quadratic polynomial, return the roots.

    quad_formula <- function(a, b, c){
      stopifnot(b ^ 2 >= 4 * a * c)
      c((-b - sqrt(b ^ 2 - 4 * a * c)) / (2 * a),
        (-b + sqrt(b ^ 2 - 4 * a * c)) / (2 * a))
    }
  6. Find the least common multiple of two numbers.

    LCM <- function(a, b){
      test <- a * c(1:b)
      
      #note: my_integer is a function created in the 
      # drill Manipulating Individual Elements 4
      multiples <- test[my_integer(test / b)] 
      min(multiples)
    }
  7. Find the determinant of a two by two matrix.

    determinant <- function(matrix){
      matrix[1, 1] * matrix[2, 2] - matrix[1, 2] * matrix[2, 1]
    }
  8. Display the number of groups of size n can be made from the inputted vector of length k.

    n_choose_k <- function(n, k){
      factorial(n) / (factorial(k) * factorial(n - k))
    }
  9. Return the number of unique permutations that can be from a given vector. Assume that each element in the vector is unique. (caution: don't use large vectors).

    num_seqs <- function(vec){
      n <- length(vec)
      factorial(n)
    }
  10. Return the number of unique sets that can be made from an inputted vector.

    num_sets <- function(vec){
      vec <- unique(vec)
      n <- length(vec)
      my_sum(2 ^ c(0:(n-1)))
    }

Statistical operations

  1. Calculate the mean of a vector

    my_mean <- function(vec){
      my_sum(vec) / length(vec)
    }
  2. Calculate the mean of a vector, removing missing values.

    my_mean2 <- function(vec){
      my_mean(na.omit(vec))
    }
  3. Calculate the sample variance of a vector.

    my_var <- function(vec){
      xbar <- my_mean(vec)
      sum <- my_sum((v - xbar) ^ 2)
      
      # let's use n-1 so we can compare with R's var(). 
      # This is the sample variance
      n <- length(vec)
      sum / (n - 1)
     }
  4. Find the range of a vector.

    my_range <- function(vec){
      max(vec) - min(vec)
    }
  5. Index a series of observations by the first observation (hint: express each observation as a percentage of the first observation).

    how_to_index <- function(vec){
      vec / vec[1] * 100
    }
  6. Randomly return one of the following phrases, "Ace", "King" or "Queen" with equal probability of returning each.

    random <- function(){
      sample(c("Ace", "King", "Queen"), 1)
    }
  7. Randomly return one of the following phrases, "Ace", "King" or "Queen" with twice as much probability of returning "Ace" as either "King" or "Queen."

     random2 <- function(){
      sample(c("Ace", "King", "Queen"), 1, prob = c(2, 1, 1))
    }
  8. Return whether a vector of numbers is right skewed or left skewed by comparing its mean and median.

    # note: this concept only works for large amounts of skew - Garrett
    skew <- function(vec){
      if (mean(vec) < median(vec))
        return("left-skewed")
      if (mean(vec) > median(vec))
        return("right-skewed")
      return("symmetric")
    }
  9. Find the (statistical) mode of a vector.

    mode_vec <- function(vec){
      counts <- as.data.frame(table(vec))
      most_freq <- which(counts[, 2] == max(counts[, 2]))
      counts[most_freq, 2]
    }
  10. Given a numeric vector of length 100, determine which element occurs at the 70th percentile.

    ptile100 <- function(vec){
      vec[order(vec)]
      vec[70]
    }
  11. Given a numeric vector of length 10, determine which element occurs at the 70th percentile.

    ptile10 <- function(vec){
      vec[order(vec)]
      vec[7]
    }
  12. Given a numeric vector of length 10, determine which element occurs at the 70th percentile.

    ptile <- function(vec){
      vec[order(vec)]
      vec[round(.7 * length(vec), 1)]
    }

Manipulating individual elements

  1. Test whether a number is even.

    is.even <- function(num){
      num %% 2 == 0
    }
  2. Test whether a number is odd.

    is.odd <- function(num){
      num %% 2 == 1
    }
  3. If a number is odd add one to it.

    one_to_odd <- function(num){
      if (is.odd(num))
        return(num + 1)
      num
    }
  4. Test whether a number is an integer.

    my_integer <- function(a){
      trunc(a) == a
    }
  5. Separate the integer and decimal parts of a number, return them in a vector of length two.

    split_num <- function(num){
      c(trunc(num), num - trunc(num))
    }
  6. Take any character string and add "...in Stat 405" to the end.

    fortune_cookie <- function(fortune){
      paste(fortune, "...in Stat405.")
    }
  7. Take any character string and add "...in Stat 405" to the end. Check that the input is a character string. Return an error if it is not.

    fortune_cookie <- function(fortune){
      stopifnot(is.character(fortune))
      paste(fortune, "...in Stat405.")
    }
  8. Identify whether an object is a logical, character, or numeric object.

    type <- function(obj){
      mode(obj)
    }
  9. Return whichever the entered number is closest to: 0 or 1000.

    closest <- function(a){
      ifelse(a > 500, 1000, 0)  
    }

Manipulating vectors

  1. Return the lowest positive value of a vector.

    min_pos <- function(vec){
      pos <- vec[vec > 0]
      min(pos)
    }
  2. Return the second lowest positive value of a vector.

    min2_pos <- function(vec){
      pos <- vec[vec > 0]
      sort(pos)[2]
    }
  3. Divide each element in a numeric vector by the vector's length.

    div_vec <- function(vec){
      vec / length(vec)
    }
  4. If any number in a numeric vector is odd, add one to it.

    one_to_odd2 <- function(vec){
      vec[is.odd(vec)] <- vec[is.odd(vec)] + 1
      vec
    }	
    
    # note: this function is better than the previous one
    # It works for numbers AND vectors, and it is simpler
  5. Find the sum of a vector (without using the built-in sum function)

    	my_sum <- function(vec){
    	  total <- 0
    	  for (i in 1:length(vec)){
    	  	total <- total + vec[i]
    	  }
    	  total
    	}
  6. Return the given vector with all NA's removed.

    clean <- function(vec){
      na.omit(vec)
    }
  7. Create a new vector by repeating a given vector a given number of times.

    rep_vec <- function(vec, n){
      rep(vec, n)	
    }
  8. Double each element in a vector (e.g., turn {a,b,c,...} into {a, a, b, b, c,...}).

    rep_vec2 <- function(vec, n){
      rep(vec, each = 2)
    }
  9. Create a new vector where each ith element is the sum of the first i elements of the given vector.

    cumsum <- function(vec){
      for(i in 1:length(vec)){
      	vec[i] <- sum(vec[1:i])
      }
      vec
    } 
    
  10. Select the number in a vector that is the greatest distance from the first element of the vector

    select <- function(vec){
      distance <- abs(vec - vec[1])
      vec[which(distance == max(distance))]
    }
  11. Return a vector with its elements reordered in a random manner.

    shuffle <- function(vec){
      sample(vec, length(vec), replace = F) 
    }
  12. Return a vector with its elements ordered from smallest to largest.

    order1 <- function(vec){
      sort(vec)
    }
  13. Return a vector with its elements ordered largest to smallest.

    order2 <- function(vec){
      -sort(-vec)
    }

Manipulating dataframes

  1. Return the row numbers of rows in a data frame that contain NA's.

    get_NAs <- function(df){
      new <- na.omit(df)
      setdiff(row.names(df), row.names(new))
    }
  2. Return the actual rows of a data frame that contain NA's.

    get_NAs2 <- function(df){
      new <- na.omit(df)
      rows <- setdiff(row.names(df), row.names(new))
      df[rows, ]
    }
  3. Save a copy of a data frame as a comma separated values file whose filename is the name of the data frame plus ".csv".

    save_file <- function(df){
      filename <- paste(substitute(df), "csv", sep = ".")
    
      # best method to avoid adding row numbers
      write.table(file, filename, sep = ",", row = F)
    }
  4. Re-order the rows of a data frame so that the entries in the first column go from smallest to largest as you read down the column.

    order_df <- function(df){
      df[order(df[, 1]), ]
    }
  5. Given a data frame with two columns, return all of the combinations of the two variables that occur once or more.

    combos <- function(df){
      combinations <- table(df[, 1], df[, 2])
      df_counts <- as.data.frame(combinations)
      names(df_counts) <- c(names(df), "count")
      df_counts <- subset(df_counts, count > 0)
      df_counts
    }
  6. Automatically plot the above results with each variable on an axis and the number of occurrences (counts) represented by color.

    colorful_counts <- function(df){
      df_counts <- combos(df)
      df_counts <- within(df_counts, {
        x <- as.numeric(as.character(df_counts[, 1]))
        y <- as.numeric(as.character(df_counts[, 1]))
      })
      qplot(x, y, data = df_counts, colour = count)
    }

Plotting data

  1. Automatically create a histogram of a vector.

    my_hist <- function(vec){
      hist(vec)
    }
  2. Automatically create a histogram of a vector with the given number of bins.

    better_hist <- function(vec, n){
      hist(vec, breaks = n)
    }
  3. Automatically create a scatterplot matrix with the variables in a given data frame.

    scatterplot <- function(df){
      library(ggplot2)
      plotmatrix(df)
    }
  4. Save the current graph with width = 6 and height = 6 as a pdf with the inputted name.

    save_plot <- function(name){
      filename <- paste("name", "pdf", sep = ".")
      ggsave(filename, width = 6, height = 6)
    }
  5. Save the current graph with a given width and height as a pdf with the inputted name.

    save_plot2 <- function(name, width, height){
      filename <- paste("name", "pdf", sep = ".")
      ggsave(filename, width = width, height = height)
    }