The functions included in staticimports are meant to be fast. For many of the functions, many implementations have been considered. This document contains performance tests of the various possible implementations.

purrr-like functions

walk

walk_purrr <- purrr::walk

walk_lapply <- function(.x, .f, ...) {
  lapply(.x, .f, ...)
  NULL
}

walk_for <- function(.x, .f, ...) {
  for (i in seq_along(.x)) {
    .f(.x[[i]], ...)
  }
  NULL
}

x <- 1:100
f <- function(a) a
(times <- bench::mark(
  walk_purrr(x, f),
  walk_lapply(x, f),
  walk_for(x, f),
  check = FALSE
))
#> # A tibble: 3 × 6
#>   expression             min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>        <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 walk_purrr(x, f)     113µs  120.8µs     8180.   125.6KB     12.4
#> 2 walk_lapply(x, f)   65.4µs   72.2µs    13547.      848B     14.9
#> 3 walk_for(x, f)      47.9µs   51.9µs    18928.    17.9KB     18.8

Of the three implementations, walk_for() is the fastest. It is about 57% faster than walk_purrr(), and 40% faster than walk_lapply().

Note that walk_purrr() returns the input .x object, whereas the other two implementations return NULL.

map

map_purrr <- purrr::map

map_lapply <- function(.x, .f, ...) {
  lapply(.x, .f, ...)
}

map_for <- function(.x, .f, ...) {
  res <- vector("list", length(.x))
  for (i in seq_along(.x)) {
    res[[i]] <- .f(.x[[i]], ...)
  }
  names(res) <- names(.x)
  res
}

x <- 1:100
f <- function(a) a
bench::mark(
  map_purrr(x, f),
  lapply(x, f),     # Bare lapply() for comparison
  map_lapply(x, f),
  map_for(x, f),
)
#> # A tibble: 4 × 6
#>   expression            min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>       <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 map_purrr(x, f)   111.2µs  119.7µs     8220.    3.69KB     14.6
#> 2 lapply(x, f)       63.8µs   71.9µs    13801.      848B     19.1
#> 3 map_lapply(x, f)   65.1µs   71.7µs    13785.      848B     18.9
#> 4 map_for(x, f)      68.6µs   73.1µs    13498.   32.77KB     12.6

All of these implementations are within a pretty close range. Although map_for() is fastest, the margin is small, so we’ll just use map_lapply() for simplicity.

map2

map2_purrr <- purrr::map2

map2_mapply <- function(.x, .y, .f, ...) {
  mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE)
}

map2_for <- function(.x, .y, .f, ...) {
  res <- vector("list", length(.x))
  for (i in seq_along(.x)) {
    res[[i]] <- .f(.x[[i]], .y[[i]], ...)
  }
  names(res) <- names(.x)
  res
}

x <- 1:100
y <- x * 1000
f <- function(a, b) a+b
bench::mark(
  map2_purrr(x, y, f),
  map2_mapply(x, y, f),
  map2_for(x, y, f),
)
#> # A tibble: 3 × 6
#>   expression                min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>           <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 map2_purrr(x, y, f)     168µs    180µs     5483.    26.3KB     16.8
#> 2 map2_mapply(x, y, f)    124µs    135µs     7288.      848B     23.5
#> 3 map2_for(x, y, f)       100µs    107µs     9195.    31.8KB     16.4

# With named vector
names(x) <- as.character(x)
(times <- bench::mark(
  map2_purrr(x, y, f),
  map2_mapply(x, y, f),
  map2_for(x, y, f),
))
#> # A tibble: 3 × 6
#>   expression                min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>           <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 map2_purrr(x, y, f)     167µs    173µs     5712.    1.09KB     16.7
#> 2 map2_mapply(x, y, f)    126µs    132µs     7382.      848B     23.6
#> 3 map2_for(x, y, f)       101µs    110µs     8648.      848B     16.8

map2_for is about 17% faster than map2_mapply, for both named and unnamed inputs, so we’ll use map2_for().

map2_lgl, map2_int, map2_dbl, map2_chr

The map2* functions return an atomic vector of the specified type.

# Use map2_for implementation from previous section
map2 <- map2_for

map2_dbl_purrr <- purrr::map2_dbl

map2_dbl_mode <- function(.x, .y, .f, ...) {
  res <- map2(.x, .y, .f, ...)
  mode(res) <- "double"
  res
}

map2_dbl_storagemode <- function(.x, .y, .f, ...) {
  res <- map2(.x, .y, .f, ...)
  storage.mode(res) <- "double"
  res
}

# This version is not strictly the same as the others, because it drops names.
map2_dbl_asnumeric <- function(.x, .y, .f, ...) {
  res <- as.numeric(map2(.x, .y, .f, ...))
  names(res) <- names(.x)
  res
}

map2_dbl_for <- function(.x, .y, .f, ...) {
  res <- vector("double", length(.x))
  for (i in seq_along(.x)) {
    res[[i]] <- .f(.x[[i]], .y[[i]], ...)
  }
  names(res) <- names(.x)
  res
}

x <- 1:1000
y <- x * 10000
x <- as.list(x)
y <- as.list(y)
f <- function(a, b) a+b

# This is what the output should look like
map2_dbl_purrr(1:3, 101:103, f)
#> [1] 102 104 106

bench::mark(
  map2_dbl_purrr(x, y, f),
  map2_dbl_mode(x, y, f),
  map2_dbl_storagemode(x, y, f),
  map2_dbl_asnumeric(x, y, f),
  map2_dbl_for(x, y, f),
)
#> # A tibble: 5 × 6
#>   expression                         min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 map2_dbl_purrr(x, y, f)         1.04ms   1.11ms      887.    8.12KB     21.6
#> 2 map2_dbl_mode(x, y, f)        924.89µs 982.86µs     1011.   31.05KB     14.7
#> 3 map2_dbl_storagemode(x, y, f) 907.21µs 956.48µs     1038.   15.72KB     14.7
#> 4 map2_dbl_asnumeric(x, y, f)   910.46µs 955.38µs     1035.   15.72KB     14.9
#> 5 map2_dbl_for(x, y, f)         858.79µs 906.78µs     1099.   38.85KB     16.9

# Same test, with names
names(x) <- as.character(x)
(times <- bench::mark(
  map2_dbl_purrr(x, y, f),
  map2_dbl_mode(x, y, f),
  map2_dbl_storagemode(x, y, f),
  map2_dbl_asnumeric(x, y, f),
  map2_dbl_for(x, y, f),
))
#> # A tibble: 5 × 6
#>   expression                         min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                    <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 map2_dbl_purrr(x, y, f)         1.05ms   1.14ms      875.    8.12KB     19.3
#> 2 map2_dbl_mode(x, y, f)        925.98µs 988.46µs      998.   15.72KB     14.8
#> 3 map2_dbl_storagemode(x, y, f) 905.77µs 954.97µs     1044.   15.72KB     17.0
#> 4 map2_dbl_asnumeric(x, y, f)   910.38µs 965.19µs     1028.   15.72KB     14.8
#> 5 map2_dbl_for(x, y, f)         863.83µs 929.21µs     1054.    7.86KB     15.0

map2_dbl_for() is the fastest by a bit. However, one drawback is that if .f() returns a value of the incorrect type, it simply promotes the result vector to that type, and emits no warnings; the returned vector is not guaranteed to be of the specified type. This is not acceptable behavior.

x <- c(1, 2)
map2_dbl_for(x, x, function(a, b) "test")
#> [1] "test" "test"

The ideal behavior in this situation is for the function to throw an error when an incorrect type is returned. This is what map2_dbl_purrr() does.

map2_dbl_purrr(x, x, function(a, b) "test")
#> Error in `map2_dbl_purrr()`:
#>  In index: 1.
#> Caused by error:
#> ! Can't coerce from a string to a double.

The other three versions, map2_dbl_mode(), map2_dbl_storagemode(), and map2_dbl_asnumeric() emit warnings, which isn’t ideal but it is acceptable.

map2_dbl_mode(x, x, function(a, b) "test")
#> Warning in mde(x): NAs introduced by coercion

#> Warning in mde(x): NAs introduced by coercion
#> [1] NA NA
map2_dbl_storagemode(x, x, function(a, b) "test")
#> Warning in storage.mode(res) <- "double": NAs introduced by coercion
#> Warning in storage.mode(res) <- "double": NAs introduced by coercion
#> [1] NA NA
map2_dbl_asnumeric(x, x, function(a, b) "test")
#> Warning in map2_dbl_asnumeric(x, x, function(a, b) "test"): NAs introduced by
#> coercion
#> Warning in map2_dbl_asnumeric(x, x, function(a, b) "test"): NAs introduced by
#> coercion
#> [1] NA NA

map2_dbl_asnumeric() has the best balance of speed, understandability, and warning/error behavior when the function returns the wrong type.