0

How can I find the most frequent value in a given column in data.frame with multiple values in a cell?

Sample data:

structure(list(time = c("act1_1", "act1_10", "act1_11", "act1_12", 
"act1_13", "act1_14", "act1_15", "act1_16", "act1_17", "act1_18", 
"act1_19", "act1_2", "act1_20", "act1_21", "act1_22", "act1_23", 
"act1_24", "act1_3", "act1_4", "act1_5", "act1_6", "act1_7", 
"act1_8", "act1_9"), `Most frequent` = c("110", "310,110,1110", 
"310,110,1110", "310,110,111,1110", "110,310,9120,111,1110", 
"110,310,111,3110,1110", "9120,110,310,210,111,1110", "9120,110,1110,210,310,111,3110", 
"1110,9120,110,310,111,210", "1110,111,110,310,210", "1110,310,110,111,3110,210,9120", 
"110", "1110,210,110,310,3110,9120", "1110,110,111,310,210,9120,3110,3210", 
"1110,9120,110,3110,310,111,3210,210,3819", "1110,9120,110,111,310,3110,210", 
"1110,9120,110,310,210,3110,8210,111", "110", "110", "110,1110", 
"110,111,1110", "110,310,1110", "110,1110", "110,210,1110")), row.names = c(NA, 
-24L), class = c("tbl_df", "tbl", "data.frame"))
user11418708
  • 882
  • 3
  • 10

4 Answers4

2

You can use table to count the cases after you have strsplit the column Most frequent.

names(sort(-table(unlist(strsplit(x$"Most frequent", ",")))))[1]
#[1] "110"
GKi
  • 27,870
  • 2
  • 18
  • 35
1

Using the Mode function from here :

Mode <- function(x) {
    ux <- unique(x)
    ux[which.max(tabulate(match(x, ux)))]
}

We can split the string on comma and pass unlisted vector to Mode function to get most frequent value.

Mode(unlist(strsplit(df$`Most frequent`, ',')))
#[1] "110"
Ronak Shah
  • 355,584
  • 18
  • 123
  • 178
1

Using dplyr:

df %>% separate_rows(`Most frequent`) %>% group_by(`Most frequent`) %>% 
summarise(Freq = n()) %>% arrange(desc(Freq)) %>% slice(1)
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 1 x 2
  `Most frequent`  Freq
  <chr>           <int>
1 110                24
> 
Karthik S
  • 10,282
  • 2
  • 9
  • 23
1

or you could work it with a nested list:

library(dplyr)
library(tidyr)
library(stringr)

df %>% 
  dplyr::mutate(X = stringr::str_split(`Most frequent`, ",")) %>% 
  tidyr::unnest(X) %>%  
  dplyr::count(X) %>% 
  dplyr::slice_max(order_by = n)
DPH
  • 3,362
  • 1
  • 6
  • 16