0

I am working with R. I wrote the following program which creates a table for a series of data manipulations steps:

#load library 
library(dplyr)
 set.seed(123)
 
 # data
 a1 = rnorm(1000,100,10)
 b1 = rnorm(1000,100,5)
 c1 = sample.int(1000, 1000, replace = TRUE)
 train_data = data.frame(a1,b1,c1)

#generate random numbers
 random_1 =  runif(1, 80, 120)
random_2 =  runif(1, random_1, 120)
 random_3 =  runif(1, 85, 120)
 random_4 =  runif(1, random_3, 120)

#bin data according to random criteria
train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_1 & b1 <= random_3, "a", ifelse(a1 <= random_2 & b1 <= random_4, "b", "c"))) 

#calculate 60th quantile ("quant") for each bin
final_table = data.frame(train_data %>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = .6)))

#create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
final_table$diff = ifelse(final_table$quant > final_table$c1,1,0)

#create a table: for each bin, calculate the average of "diff"
final_table_2 = data.frame(final_table %>% 
  group_by(cat) %>% 
  summarize(
   mean = mean(diff)
  ))

#add "total mean" to this table
final_table_2 = data.frame(final_table_2 %>% add_row(cat = "total", mean = mean(final_table$diff)))

#format this table: add the random criteria to this table for reference
final_table_2$random_1 = random_1

final_table_2$random_2 = random_2

final_table_2$random_3 = random_3

final_table_2$random_4 = random_4

 #optional: view table
 head(final_table_2)

    cat      mean random_1 random_2 random_3 random_4
1     a 0.5897436 95.67371 111.8133 94.00313 102.0569
2     b 0.5992366 95.67371 111.8133 94.00313 102.0569
3     c 0.5995423 95.67371 111.8133 94.00313 102.0569
4 total 0.5990000 95.67371 111.8133 94.00313 102.0569

Now, I am trying to create a loop which repeats this process 10 times - for each iteration, it should "stack" the new results on top of the older results (i.e. keep everything).

I tried to set up the basic loop structure:

for (i in 1:10 ) { 
for (j in 1:10 ) { 
    for (k in 1:10) {
        for (l in 1:10)  {

for (m in 1:10)  {

 random_i =  runif(1, 80, 120)
random_j =  runif(1, random_1, 120)


 random_k =  runif(1, 85, 120)
 random_l =  runif(1, random_3, 120)


train_data <- train_data %>% mutate(cat = ifelse(a1 <= random_i & b1 <= random_k, "a", ifelse(a1 <= random_j & b1 <= random_l, "b", "c"))) 

final_table = data.frame(train_data %>% group_by(cat) %>%
mutate(quant = quantile(c1, prob = .6)))

final_table$diff = ifelse(final_table$quant > final_table$c1,1,0)


final_table_m = data.frame(final_table %>% 
  group_by(cat) %>% 
  summarize(
   mean = mean(diff)
  ))

final_table_m = data.frame(final_table_m %>% add_row(cat = "total", mean = mean(final_table$diff)))

final_table_m$random_1 = random_i

final_table_m$random_2 = random_j


final_table_m$random_3 = random_k


final_table_m$random_4 = random_l

}
    }
        }
            }

But this only keeps the "latest" iteration.

Does anyone know how to keep all of the iterations, and store/keep all the results in the same table?

Thanks

stats_noob
  • 3,127
  • 2
  • 8
  • 27
  • 1
    The name `random_i` will always be `random_i`. R does not inject variable values into variable names. Creating variables with indexes in their names is generally something you should avoid in R. Better to store results in named list. See the duplicate for possible options. – MrFlick Jul 02 '21 at 04:29

0 Answers0