代码之家  ›  专栏  ›  技术社区  ›  voiDnyx

连续x轴ggplot boxplot的方框宽度不正确?

  •  1
  • voiDnyx  · 技术社区  · 6 年前

    我正在绘制与geom_point()和geom_boxplot()相同的数据,但是我的boxplots的宽度似乎是关闭的。最大的x为292,但相应的框小于285。我怎样才能把这个弄到合适的尺寸?

    enter image description here

    以下是我的数据的最小示例:

    x = data.frame(cluster = c("c1","c2","c3","c4","c5","c6"), 
                   elements = c(292,277,170,160,153,141), 
                   divergence = c(0.08344564,0.12130600,0.05564219,0.12826086,0.05386341,0.09620389))
    
    x.160 = x[x$elements >= 160,]
    x.160$Size = "160+"
    
    x.60 = x[x$elements >= 60 & x$elements < 160,]
    x.60$Size = "60-160"
    
    binnedX = rbind(x.60,x.160)
    
    
    p1a = ggplot(data = binnedX, aes(x = elements, y =  divergence, group = Size))+
      geom_hline(yintercept = mean(binnedX$divergence), color = "black", linetype=2)+
      geom_point(aes(color = Size))+ 
      scale_x_continuous(breaks = c(seq(0,300,15))) +
      scale_y_continuous(breaks = seq(0.00,0.25,0.05))+
      scale_color_brewer(palette = "Spectral") +  
      ggtitle("element sequence divergence by cluster [clustalO, 100bp]") +
      labs(x="Elements per cluster", y="Divergence")+
      theme_linedraw(base_size = 18)+
      theme(plot.title = element_text(hjust = 0.5), 
            axis.title.x = element_text(margin = margin(15,15,15,15,"pt")), 
            axis.title.y = element_text(margin = margin(15,15,15,15,"pt")))
    
    p2a = ggplot(data = binnedX, aes(x = elements, y =  divergence, group = Size))+
      geom_hline(yintercept = mean(binnedX$divergence), color = "Red", linetype=2)+
      geom_boxplot(aes(fill = Size)) +
      scale_x_continuous(breaks = c(seq(0,300,15)))+
      scale_y_continuous(breaks = seq(0.00,0.25,0.05))+
      scale_fill_brewer(palette = "Spectral") +
      labs(x="Elements per cluster", y="Divergence")+
      theme_linedraw(base_size = 18)+
      theme(plot.title = element_text(hjust = 0.5), 
            axis.title.x = element_text(margin = margin(15,15,15,15,"pt")), 
            axis.title.y = element_text(margin = margin(15,15,15,15,"pt")),
            axis.text.x = element_text(angle=0))
    
    multiplot(p1a,p2a)
    
    1 回复  |  直到 6 年前
        1
  •  2
  •   Z.Lin    6 年前

    当我们按组创建方框图时,每个方框的宽度硬编码为组数据范围的90%。我们可以从 compute_group() 在功能 StatBoxplot :

    # reproducing lines 87-88 of stat-boxplot.r
    if (length(unique(data$x)) > 1)
      width <- diff(range(data$x)) * 0.9
    

    我们可以通过定义 计算组() 函数基于 StatBoxplot$compute_group :

    modified.function <- function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5) {
      qs <- c(0, 0.25, 0.5, 0.75, 1)
      if (!is.null(data$weight)) {
        mod <- quantreg::rq(y ~ 1, weights = weight, data = data, tau = qs)
        stats <- as.numeric(stats::coef(mod))
      }
      else {
        stats <- as.numeric(stats::quantile(data$y, qs))
      }
      names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
      iqr <- diff(stats[c(2, 4)])
      outliers <- data$y < (stats[2] - coef * iqr) | data$y > (stats[4] + coef * iqr)
      if (any(outliers)) {
        stats[c(1, 5)] <- range(c(stats[2:4], data$y[!outliers]), na.rm = TRUE)
      }
      if (length(unique(data$x)) > 1)
        width <- diff(range(data$x)) * 1 # instead of 0.9
      df <- as.data.frame(as.list(stats))
      df$outliers <- list(data$y[outliers])
      if (is.null(data$weight)) {
        n <- sum(!is.na(data$y))
      }
      else {
        n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)])
      }
      df$notchupper <- df$middle + 1.58 * iqr/sqrt(n)
      df$notchlower <- df$middle - 1.58 * iqr/sqrt(n)
      df$x <- if (is.factor(data$x))
        data$x[1]
      else mean(range(data$x))
      df$width <- width
      df$relvarwidth <- sqrt(n)
      df}
    

    创建一个基于 StatBoxplot ,以及基于 stat_boxplot :

    StatBoxplot2 <- ggproto(`_class` = "StatBoxplot2",          # class name
                            `_inherit` = StatBoxplot,           # inherit from StatBoxplot
                            compute_group = modified.function)  # override its compute_group function
    
    stat_boxplot2 <- function(mapping = NULL, data = NULL, geom = "boxplot", position = "dodge2", ...,
                               coef = 1.5, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE){
      layer(data = data, mapping = mapping, 
            stat = StatBoxplot2,                           # use StatBoxplot2 rather than StatBoxplot
            geom = geom, position = position, 
            show.legend = show.legend, inherit.aes = inherit.aes, 
            params = list(na.rm = na.rm, coef = coef, ...))
    }
    

    比较使用默认 stat = "boxplot" ,其中一个使用 stat = "boxplot2" :

    # Base plot with vertical dashed lines to indicate each point's position, & theme_classic for a
    # less cluttered background. I also changed the palette as Spectral's yellow was really hard to see.
    p <- ggplot(data = binnedX, 
                aes(x = elements, y =  divergence, fill = Size))+
      geom_point(aes(color = Size), size = 3) +
      geom_vline(aes(xintercept = elements), linetype = "dashed") +
      scale_fill_brewer(palette = "Set1") +
      scale_color_brewer(palette = "Set1", guide = FALSE) +
      theme_classic()
    
    gridExtra::grid.arrange(p + ggtitle("original") + geom_boxplot(alpha = 0.5),
                            p + ggtitle("modified") + geom_boxplot(alpha = 0.5, stat = "boxplot2"))
    

    plot