I am simply trying to fill a dataframe based on indexing and for loops, but it's not working. The error I get is: Error in [<-.data.frame(*tmp*, j, (2 * i) - 1, value = 0.2) :
new columns would leave holes after existing columns. What can I do?
It should have something to do with the indexing and the way the dataframe of hitrates is made, but I can't figure out what it is.
#Here, we make different working directories as we need to get results for 20 sets.
wd = "~/Study/BSc Thesis/Resultaten/R"
sets = 1:20
wds = paste("~/Study/BSc Thesis/Resultaten/R/var_eq_err_5_n_200_g_5_tab_", sets, sep = "")
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
#Dataframe for hitrates per bicluster
hitrates = data.frame(matrix(ncol = 40, nrow = 5))
#Time it:
start_time = Sys.time()
#CHENG CHURCH
for (i in sets) {
setwd(wds[i])
colnames(hitrates)[(2*i) - 1] = paste0("Rows of set ", sets[i])
colnames(hitrates)[2*i] = paste0("Columns of set ", sets[i])
list = read.table("data.txt", header = FALSE)
data = matrix(unlist(list), ncol = 200, nrow = 200, byrow = TRUE)
res = biclust(data, method = BCCC(), delta = 1, alpha=2, number = 5)
res
ccbiclrows = t(res@RowxNumber)
ccbiclcols = res@NumberxCol
real_columns = read.table("column_cluster.txt", header = FALSE)
#The mode is used to find the cluster that is the most probable to be the true cluster.
true_columns = vector()
for (i in 1:200) {
true_columns[i] = getmode(real_columns[,i])
}
#Do the same thing for the rows:
real_rows = read.table("row_cluster.txt", header = FALSE)
#The mode is used to find the cluster that is the most probable to be the true cluster.
true_rows = vector()
for (j in 1:200) {
true_rows[j] = getmode(real_rows[,j])
}
#Now we have the true column/row indices for the data.
#Correctness:
for (j in 1:5) {
#Rows:
cor = intersect(which(true_rows %in% j), which(ccbiclrows[j,] %in% 1))
hitraterow = length(cor)/length(which(ccbiclrows[j,] %in% 1))
hitrates[j,(2*i)-1] = hitraterow
#Columns:
cor = intersect(which(true_columns %in% j), which(ccbiclcols[j,] %in% 1))
hitratecol = length(cor)/length(which(ccbiclcols[j,] %in% 1))
hitrates[j,(2*i)] = hitratecol
}
}
hitrates