0

Consider following data:

> df
   nt_1 NB1949j NB2021j NB3366j NB19491f NB2021f NB3366f nt_2
1     1       1       1       1        1       0       1    1
2     0       0       1       0        1       1       0    0
3     1       1       0       0        0       0       0    1
4     0       0       1       1        1       1       1    0
5     0       0       1       1        1       1       0    0
6     1       1       1       0        1       0       0    1
7     1       1       0       0        0       1       0    1
8     0       0       1       0        0       0       1    0
9     0       0       0       0        0       1       0    0
10   -1       1       1       1        1       1       1   -1

How to calculate following variables:

NB1949jf=NB1949j+NB1949f
NB2021jf=NB2021j+NB2021f
NB3366jf=NB3366j+NB3366f

I have too many variables that start with "NB" and end with "j" or "f". The names of variables is "NB"+ a unique code like 1949+ 'j' or 'f'. I want to sum of columns based on the unique code(like 1949).(It require to get the unique codes from dataframe col names)

library(dplyr)
df=data.frame(
  nt_1=c(1,0,1,0,0,1,1,0,0,-1),
  NB1949j=c(1,0,1,0,0,1,1,0,0,1),
  NB2021j=c(1,1,0,1,1,1,0,1,0,1),
 NB3366j=c(1,0,0,1,1,0,0,0,0,1),
  NB1949f=c(1,1,0,1,1,1,0,0,0,1),
  NB2021f=c(0,1,0,1,1,0,1,0,1,1),
  NB3366f=c(1,0,0,1,0,0,0,1,0,1),
  nt_2=c(1,0,1,0,0,1,1,0,0,-1)
 )


NB1949jf=NB1949j+NB1949f
NB2021jf=NB2021j+NB2021f
NB3366jf=NB3366j+NB3366f

I think the first step to select the columns:

df2<-df %>% select(starts_with("NB") , ends_with("j") |ends_with("f"))
 df2
   NB1949j NB2021j NB3366j NB19491f NB2021f NB3366f
1        1       1       1        1       0       1
2        0       1       0        1       1       0
3        1       0       0        0       0       0
4        0       1       1        1       1       1
5        0       1       1        1       1       0
6        1       1       0        1       0       0
7        1       0       0        0       1       0
8        0       1       0        0       0       1
9        0       0       0        0       1       0
10       1       1       1        1       1       1

Now extract the unique codes as follows:

code1<-substr(names(df2),3,nchar(names(df2))-1)
Masoud
  • 247
  • 2
  • 16

0 Answers0