5

I have the following dataset:

df <- data.frame(dens = rnorm(5000),
             split = as.factor(sample(1:2, 5000, replace = T)),
             method = as.factor(sample(c("A","B"), 5000, replace = T)),
             counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))

I have the following split violin plots for splits 1 and 2 within groups A and B for each count. We have four groups for each setting but there is a nested aspect to it:

library(ggplot2)
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
                           draw_group = function(self, data, ..., draw_quantiles = NULL){
                               ## By @YAK: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
                               data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
                               grp <- data[1,'group']
                               newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
                               newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
                               newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
                               if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
                                   stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
                                   quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
                                   aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
                                   aesthetics$alpha <- rep(1, nrow(quantiles))
                                   both <- cbind(quantiles, aesthetics)
                                   quantile_grob <- GeomPath$draw_panel(both, ...)
                                   ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
                               }
                               else {
                                   ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
                               }
                           }
                           )

create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
    dens <- cumsum(data$density)/sum(data$density)
    ecdf <- stats::approxfun(dens, data$y)
    ys <- ecdf(draw_quantiles)
    violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
    violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
    violin.xs <- (stats::approxfun(data$y, data$x))(ys)
    if (grp %% 2 == 0) {
        data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs), 
                   y = rep(ys, each = 2), group = rep(ys, each = 2)) 
    } else {
        data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs), 
                   y = rep(ys, each = 2), group = rep(ys, each = 2)) 
    }
}

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
    layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}



df$key <- factor(paste(df$split, df$method))

levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE, name = "Counts")

And I get the following:

enter image description here

Which is great, except that I would like to only have labels of counts 1, 10, 100, 1000, 10000 on the x-axis and in between the blue and the green violin plots. So label 1 in between the first blue and the green violin plots, 10 in between the second blue and the green violin plots, 100 in between the second blue and the green violin plots and so on.

Thanks for any suggestions on how to do this.

Mike Wise
  • 20,587
  • 7
  • 79
  • 101
user3236841
  • 1,000
  • 11
  • 32

2 Answers2

3

Instead of changing the break point for a discrete scale, you can try adding a text layer to the plot itself, which is able to accept non-integer values for discrete scale positions:

ggplot(df,
       aes(x = x, y = dens, fill = key)) + 
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +

  # annotate layer with non-integer positions
  annotate(geom = "text", x = c(1.5, 4.5, 7.5, 10.5, 13.5), y = -3.75,
           label = c("1", "10", "100", "1000", "10000")) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired", n=4)) + 
  scale_x_discrete(name = "Counts", drop = FALSE) +
  theme_minimal() + 

  # hide the actual discrete labels / ticks
  theme(legend.position="bottom",
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank())

plot

Z.Lin
  • 25,090
  • 5
  • 44
  • 85
3

I usually solve these issues with facets, then format the strips as though they are axis labels. This also naturally puts the pairs closer together, without any hacks, and you can change the distance by changing theme(panel.spacing = .....), if needed. E.g.:

ggplot(df, aes(x = split, y = dens, fill = key)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + 
  xlab('count') +
  facet_grid(~counts, scales = 'free_x', switch = 'x') +
  theme_light() + 
  theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        strip.background = element_blank(), strip.text = element_text(color = 'black'))

enter image description here

Or a different theme with less obvious facets:

ggplot(df, aes(x = split, y = dens, fill = key)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + 
  xlab('count') +
  facet_grid(~counts, scales = 'free_x', switch = 'x') +
  theme_minimal() + 
  theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank())

enter image description here

Axeman
  • 29,327
  • 7
  • 77
  • 89
  • Can there be a larger gap between the "counts" for the second plot? Also, "bounding box" around axes would be worth trying. " – user3236841 Mar 12 '18 at 13:52
  • Yes, by changing `theme(panel.spacing = .....)`, as stated in the answer... – Axeman Mar 12 '18 at 13:53
  • Thanks! I don't like the fact that the lines do not continue acrorss the figure, however. I guess we can not put an overall bounding box? – user3236841 Mar 12 '18 at 14:12
  • I have grown to like your second answer more and more out of the three answers. Btw, is it possible to have colour of blue and green for the 1 and 2, and light and dark hue (I guess this would be light gray for A and dark gray for B) in the legend? – user3236841 Mar 14 '18 at 04:27