Microbenchmarking Feather

First fill a data frame:

library(devtools)
# install_github("wesm/feather/R")
library(feather)
library(microbenchmark)

set.seed(3-29-16)

rows <- 100000

x <- data.frame(ints = round(runif(rows, -100, 100)), stringsAsFactors = FALSE)
x$floats <- runif(rows, -100, 100)
x$bools <- sample(c(TRUE, FALSE), rows, replace = TRUE)
x$dates <- as.POSIXct(runif(rows, 100000000, 1459293171), origin = "1970-01-01")
x$categories <- as.factor(sample(c(LETTERS, 0:9), rows, replace = TRUE))
x$strings <- replicate(rows, paste0(sample(letters, sample(1:10, 1), replace = TRUE), collapse = ""))

Benchmark writes:

microbenchmark(
  write.csv(x, file = "x.csv"), times = 10
)

## Unit: milliseconds
##                          expr      min       lq     mean   median      uq
##  write.csv(x, file = "x.csv") 961.7503 991.5472 1059.493 1011.836 1046.36
##       max neval
##  1425.711    10

microbenchmark(
  save(x, file = "x.rda"), times = 10
)

## Unit: milliseconds
##                     expr      min       lq     mean   median       uq
##  save(x, file = "x.rda") 366.4059 384.4919 394.2606 397.1226 406.9492
##       max neval
##  411.7216    10

microbenchmark(
  saveRDS(x, file = "x.rds"), times = 10
)

## Unit: milliseconds
##                        expr      min       lq     mean   median      uq
##  saveRDS(x, file = "x.rds") 369.1186 378.6542 386.3716 385.2458 397.955
##       max neval
##  402.8885    10

microbenchmark(
  write_feather(x, "x.feather"), times = 10
)

## Unit: milliseconds
##                           expr      min       lq     mean   median      uq
##  write_feather(x, "x.feather") 20.33291 22.99778 29.42579 26.81864 39.6417
##       max neval
##  41.56623    10

File size:

rm(x, rows)

file.size("x.csv")

## [1] 6667705

file.size("x.rda")

## [1] 2172037

file.size("x.rds")

## [1] 2172038

file.size("x.feather")

## [1] 3761724

Benchmark reads:

microbenchmark(
  y <- read.csv("x.csv"), times = 10
)

## Unit: seconds
##                    expr      min       lq     mean   median       uq
##  y <- read.csv("x.csv") 2.410053 2.422003 2.491217 2.444062 2.483281
##       max neval
##  2.881501    10

rm(y)

microbenchmark(
  load("x.rda"), times = 10
)

## Unit: milliseconds
##           expr      min       lq     mean   median       uq      max neval
##  load("x.rda") 57.38035 57.96498 62.40219 60.32777 62.55126 78.52523    10

rm(x)

microbenchmark(
  y <- readRDS("x.rds"), times = 10
)

## Unit: milliseconds
##                   expr      min       lq     mean   median       uq
##  y <- readRDS("x.rds") 57.07299 61.96834 68.87885 65.53774 77.75659
##       max neval
##  86.40591    10

rm(y)

microbenchmark(
  y <- read_feather("x.feather"), times = 10
)

## Unit: milliseconds
##                            expr      min       lq     mean   median
##  y <- read_feather("x.feather") 14.75314 15.68475 22.78204 17.50723
##        uq     max neval
##  28.85749 43.1859    10

session_info()

## Session info --------------------------------------------------------------

##  setting  value                       
##  version  R version 3.2.4 (2016-03-10)
##  system   x86_64, darwin13.4.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/New_York            
##  date     2016-03-29

## Packages ------------------------------------------------------------------

##  package        * version    date       source                       
##  colorspace       1.2-6      2015-03-11 CRAN (R 3.2.0)               
##  devtools       * 1.10.0     2016-01-23 CRAN (R 3.2.3)               
##  digest           0.6.9      2016-01-08 CRAN (R 3.2.3)               
##  evaluate         0.8.3      2016-03-05 CRAN (R 3.2.4)               
##  feather        * 0.0.0.9000 2016-03-29 Github (wesm/feather@a58e3be)
##  ggplot2          2.1.0      2016-03-01 CRAN (R 3.2.4)               
##  gtable           0.2.0      2016-02-26 CRAN (R 3.2.3)               
##  htmltools        0.3.5      2016-03-21 CRAN (R 3.2.4)               
##  knitr            1.12.3     2016-01-22 CRAN (R 3.2.3)               
##  magrittr         1.5        2014-11-22 CRAN (R 3.2.0)               
##  memoise          1.0.0      2016-01-29 CRAN (R 3.2.3)               
##  microbenchmark * 1.4-2.1    2015-11-25 CRAN (R 3.2.2)               
##  munsell          0.4.3      2016-02-13 CRAN (R 3.2.3)               
##  plyr             1.8.3      2015-06-12 CRAN (R 3.2.0)               
##  Rcpp             0.12.4     2016-03-26 CRAN (R 3.2.4)               
##  rmarkdown        0.9.5      2016-02-22 CRAN (R 3.2.3)               
##  scales           0.4.0      2016-02-26 CRAN (R 3.2.3)               
##  stringi          1.0-1      2015-10-22 CRAN (R 3.2.0)               
##  stringr          1.0.0      2015-04-30 CRAN (R 3.2.0)               
##  yaml             2.1.13     2014-06-12 CRAN (R 3.2.0)

Pull requests are welcome!
If you run this on your machine I’d love to see the results: @seankross

Microbenchmarking Feather

Sean Kross

March 29, 2016

License: CC0