df1
acc_id || s1_2005 || s1_2006
87 1 1
df2
acc_id || s2_2005 || s2_2006
87 2 3
output
df3
acc_id ||s1_2005 + s2_2005 || s1_2006 + s2_2006
87 3 4
df1
acc_id || s1_2005 || s1_2006
87 1 1
df2
acc_id || s2_2005 || s2_2006
87 2 3
output
df3
acc_id ||s1_2005 + s2_2005 || s1_2006 + s2_2006
87 3 4
There are a a bunch of ways to do this. One way is to use rbind() to stack the data together into one dataframe, and then to aggregate() the data to the acc_id level.
# example data
df1 <- data.frame(
acc_id = c(87, 88),
s1_2005 = c(1, 2),
s1_2006 = c(1, 2))
df2 <- data.frame(
acc_id = c(87, 89),
s1_2005 = c(2, 4),
s1_2006 = c(3, 4))
# combine data by stacking
df <- rbind(df1, df2)
# sum s1_2005 and s1_2006 by acc_id
res <- aggregate(df[ , c("s1_2005", "s1_2006")], by=df["acc_id"], FUN=sum)
# print result
res
The result:
acc_id s1_2005 s1_2006
87 3 4
88 2 2
89 4 4
Using dplyr
library(dplyr)
bind_rows(df1, df2) %>%
group_by(acc_id) %>%
summarise(across(everything(), sum), .groups = 'drop')
-output
# A tibble: 3 x 3
# acc_id s1_2005 s1_2006
# <dbl> <dbl> <dbl>
#1 87 3 4
#2 88 2 2
#3 89 4 4
df1 <- data.frame(
acc_id = c(87, 88),
s1_2005 = c(1, 2),
s1_2006 = c(1, 2))
df2 <- data.frame(
acc_id = c(87, 89),
s1_2005 = c(2, 4),
s1_2006 = c(3, 4))