First fill a data frame:
library(devtools)
# install_github("wesm/feather/R")
library(feather)
library(microbenchmark)
set.seed(3-29-16)
rows <- 100000
x <- data.frame(ints = round(runif(rows, -100, 100)), stringsAsFactors = FALSE)
x$floats <- runif(rows, -100, 100)
x$bools <- sample(c(TRUE, FALSE), rows, replace = TRUE)
x$dates <- as.POSIXct(runif(rows, 100000000, 1459293171), origin = "1970-01-01")
x$categories <- as.factor(sample(c(LETTERS, 0:9), rows, replace = TRUE))
x$strings <- replicate(rows, paste0(sample(letters, sample(1:10, 1), replace = TRUE), collapse = ""))
Benchmark writes:
microbenchmark(
write.csv(x, file = "x.csv"), times = 10
)
## Unit: milliseconds
## expr min lq mean median uq
## write.csv(x, file = "x.csv") 961.7503 991.5472 1059.493 1011.836 1046.36
## max neval
## 1425.711 10
microbenchmark(
save(x, file = "x.rda"), times = 10
)
## Unit: milliseconds
## expr min lq mean median uq
## save(x, file = "x.rda") 366.4059 384.4919 394.2606 397.1226 406.9492
## max neval
## 411.7216 10
microbenchmark(
saveRDS(x, file = "x.rds"), times = 10
)
## Unit: milliseconds
## expr min lq mean median uq
## saveRDS(x, file = "x.rds") 369.1186 378.6542 386.3716 385.2458 397.955
## max neval
## 402.8885 10
microbenchmark(
write_feather(x, "x.feather"), times = 10
)
## Unit: milliseconds
## expr min lq mean median uq
## write_feather(x, "x.feather") 20.33291 22.99778 29.42579 26.81864 39.6417
## max neval
## 41.56623 10
File size:
rm(x, rows)
file.size("x.csv")
## [1] 6667705
file.size("x.rda")
## [1] 2172037
file.size("x.rds")
## [1] 2172038
file.size("x.feather")
## [1] 3761724
Benchmark reads:
microbenchmark(
y <- read.csv("x.csv"), times = 10
)
## Unit: seconds
## expr min lq mean median uq
## y <- read.csv("x.csv") 2.410053 2.422003 2.491217 2.444062 2.483281
## max neval
## 2.881501 10
rm(y)
microbenchmark(
load("x.rda"), times = 10
)
## Unit: milliseconds
## expr min lq mean median uq max neval
## load("x.rda") 57.38035 57.96498 62.40219 60.32777 62.55126 78.52523 10
rm(x)
microbenchmark(
y <- readRDS("x.rds"), times = 10
)
## Unit: milliseconds
## expr min lq mean median uq
## y <- readRDS("x.rds") 57.07299 61.96834 68.87885 65.53774 77.75659
## max neval
## 86.40591 10
rm(y)
microbenchmark(
y <- read_feather("x.feather"), times = 10
)
## Unit: milliseconds
## expr min lq mean median
## y <- read_feather("x.feather") 14.75314 15.68475 22.78204 17.50723
## uq max neval
## 28.85749 43.1859 10
session_info()
## Session info --------------------------------------------------------------
## setting value
## version R version 3.2.4 (2016-03-10)
## system x86_64, darwin13.4.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/New_York
## date 2016-03-29
## Packages ------------------------------------------------------------------
## package * version date source
## colorspace 1.2-6 2015-03-11 CRAN (R 3.2.0)
## devtools * 1.10.0 2016-01-23 CRAN (R 3.2.3)
## digest 0.6.9 2016-01-08 CRAN (R 3.2.3)
## evaluate 0.8.3 2016-03-05 CRAN (R 3.2.4)
## feather * 0.0.0.9000 2016-03-29 Github (wesm/feather@a58e3be)
## ggplot2 2.1.0 2016-03-01 CRAN (R 3.2.4)
## gtable 0.2.0 2016-02-26 CRAN (R 3.2.3)
## htmltools 0.3.5 2016-03-21 CRAN (R 3.2.4)
## knitr 1.12.3 2016-01-22 CRAN (R 3.2.3)
## magrittr 1.5 2014-11-22 CRAN (R 3.2.0)
## memoise 1.0.0 2016-01-29 CRAN (R 3.2.3)
## microbenchmark * 1.4-2.1 2015-11-25 CRAN (R 3.2.2)
## munsell 0.4.3 2016-02-13 CRAN (R 3.2.3)
## plyr 1.8.3 2015-06-12 CRAN (R 3.2.0)
## Rcpp 0.12.4 2016-03-26 CRAN (R 3.2.4)
## rmarkdown 0.9.5 2016-02-22 CRAN (R 3.2.3)
## scales 0.4.0 2016-02-26 CRAN (R 3.2.3)
## stringi 1.0-1 2015-10-22 CRAN (R 3.2.0)
## stringr 1.0.0 2015-04-30 CRAN (R 3.2.0)
## yaml 2.1.13 2014-06-12 CRAN (R 3.2.0)