2

Given this tibble:

tibble(x = c(1:9))

I want to add a column x_lag_1 = c(NA,1:8), a column x_lag_2 = c(NA,NA,1:7), etc.

Up to x_lag_n.

3 Answers3

3

This can be quick with data.table:

library(data.table)

n <- seq(4)
setDT(df)[, paste0('x_lag_', n) := shift(x, n)]

df
  x x_lag_1 x_lag_2 x_lag_3 x_lag_4
1: 1      NA      NA      NA      NA
2: 2       1      NA      NA      NA
3: 3       2       1      NA      NA
4: 4       3       2       1      NA
5: 5       4       3       2       1
6: 6       5       4       3       2
7: 7       6       5       4       3
8: 8       7       6       5       4
9: 9       8       7       6       5
onyambu
  • 49,350
  • 3
  • 19
  • 45
1

You may use map_dfc to add n new columns.

library(dplyr)
library(purrr)

df <- tibble(x = c(1:9))
n <- 3
bind_cols(df, map_dfc(seq_len(n), ~df %>% 
                      transmute(!!paste0('x_lag', .x) := lag(x, .x))))

#      x x_lag1 x_lag2 x_lag3
#  <int>  <int>  <int>  <int>
#1     1     NA     NA     NA
#2     2      1     NA     NA
#3     3      2      1     NA
#4     4      3      2      1
#5     5      4      3      2
#6     6      5      4      3
#7     7      6      5      4
#8     8      7      6      5
#9     9      8      7      6
Ronak Shah
  • 355,584
  • 18
  • 123
  • 178
0

Edit 2: Reworked the answer to contemplate the case of a grouped df.

library(tidyverse)
set.seed(123)

df <- tibble(group = sample(letters[1:3], 30, replace = TRUE), x = c(1:30))

formulas <- seq(3, 12, 3) %>%
  map(~ as.formula(str_glue("~lag(.,n={.x})"))) %>%
  set_names(str_c("lag", seq(3, 12, 3)))

df %>% 
  summarise(x, across(x, lst(!!!formulas)))
#> # A tibble: 30 × 5
#>        x x_lag3 x_lag6 x_lag9 x_lag12
#>    <int>  <int>  <int>  <int>   <int>
#>  1     1     NA     NA     NA      NA
#>  2     2     NA     NA     NA      NA
#>  3     3     NA     NA     NA      NA
#>  4     4      1     NA     NA      NA
#>  5     5      2     NA     NA      NA
#>  6     6      3     NA     NA      NA
#>  7     7      4      1     NA      NA
#>  8     8      5      2     NA      NA
#>  9     9      6      3     NA      NA
#> 10    10      7      4      1      NA
#> # … with 20 more rows

df %>%
  group_by(group) %>%
  summarise(x, across(x, lst(!!!formulas)), .groups = "drop")
#> # A tibble: 30 × 6
#>    group     x x_lag3 x_lag6 x_lag9 x_lag12
#>    <chr> <int>  <int>  <int>  <int>   <int>
#>  1 a        10     NA     NA     NA      NA
#>  2 a        13     NA     NA     NA      NA
#>  3 a        16     NA     NA     NA      NA
#>  4 a        19     10     NA     NA      NA
#>  5 a        20     13     NA     NA      NA
#>  6 a        21     16     NA     NA      NA
#>  7 a        22     19     10     NA      NA
#>  8 a        27     20     13     NA      NA
#>  9 b         4     NA     NA     NA      NA
#> 10 b         6     NA     NA     NA      NA
#> # … with 20 more rows

Created on 2021-12-30 by the reprex package (v2.0.1)

jpdugo17
  • 5,531
  • 2
  • 9
  • 22