0

I am simply trying to fill a dataframe based on indexing and for loops, but it's not working. The error I get is: Error in [<-.data.frame(*tmp*, j, (2 * i) - 1, value = 0.2) : new columns would leave holes after existing columns. What can I do?

It should have something to do with the indexing and the way the dataframe of hitrates is made, but I can't figure out what it is.

#Here, we make different working directories as we need to get results for 20 sets.
wd = "~/Study/BSc Thesis/Resultaten/R"
sets = 1:20
wds = paste("~/Study/BSc Thesis/Resultaten/R/var_eq_err_5_n_200_g_5_tab_", sets, sep = "")

getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

#Dataframe for hitrates per bicluster
hitrates = data.frame(matrix(ncol = 40, nrow = 5))

#Time it:
start_time = Sys.time()

#CHENG CHURCH
for (i in sets) {
  setwd(wds[i])
  colnames(hitrates)[(2*i) - 1] = paste0("Rows of set ", sets[i])
  colnames(hitrates)[2*i] = paste0("Columns of set ", sets[i])
  list = read.table("data.txt", header = FALSE)
  data = matrix(unlist(list), ncol = 200, nrow = 200, byrow = TRUE)
  res = biclust(data, method = BCCC(), delta = 1, alpha=2, number = 5)
  res
  ccbiclrows = t(res@RowxNumber)
  ccbiclcols = res@NumberxCol
  
  real_columns = read.table("column_cluster.txt", header = FALSE)
  #The mode is used to find the cluster that is the most probable to be the true cluster.
  true_columns = vector()
  for (i in 1:200) {
    true_columns[i] = getmode(real_columns[,i])
  }
  #Do the same thing for the rows:
  real_rows = read.table("row_cluster.txt", header = FALSE)
  #The mode is used to find the cluster that is the most probable to be the true cluster.
  true_rows = vector()
  for (j in 1:200) {
    true_rows[j] = getmode(real_rows[,j])
  }
  
  #Now we have the true column/row indices for the data. 
  
  #Correctness:
  for (j in 1:5) {
    #Rows:
    cor = intersect(which(true_rows %in% j), which(ccbiclrows[j,] %in% 1))
    hitraterow = length(cor)/length(which(ccbiclrows[j,] %in% 1))
    hitrates[j,(2*i)-1] = hitraterow
    
    #Columns:
    cor = intersect(which(true_columns %in% j), which(ccbiclcols[j,] %in% 1))
    hitratecol = length(cor)/length(which(ccbiclcols[j,] %in% 1))
    hitrates[j,(2*i)] = hitratecol
  }
  
}

hitrates
eavb123
  • 11
  • 2
  • Hi! It is difficult to start generating ideas just from what you have provided. Please provide us with a small, reproducible code snippet that we can copy and paste to better understand the issue and test possible solutions. You can share datasets with or smaller samples with . (See [this answer](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example#5963610) for detailed instructions.) You might also consider changing the title of your post to something more descriptive than an error message to attract more focused attention. – ktiu Jun 04 '21 at 09:36

0 Answers0