代码之家  ›  专栏  ›  技术社区  ›  John Gagnon

如何将比较条添加到绘图中,以表示p值对应的比较

  •  1
  • John Gagnon  · 技术社区  · 7 年前

    我使用以下数据框:

    df1 <- structure(list(Genotype = structure(c(1L, 1L, 1L, 1L, 1L,
    2L,2L,2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
    1L,1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L),
    .Label= c("miR-15/16 FL", "miR-15/16 cKO"), class = "factor"), 
    Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L), .Label = c("iLN", "Spleen", "Skin", "Colon"), class = "factor"), 
    `Cells/SC/Live/CD8—,, CD4+/Foxp3+,Median,<BV421-A>,CD127` = c(518L, 
    715L, 572L, 599L, 614L, 881L, 743L, 722L, 779L, 843L, 494L, 
    610L, 613L, 624L, 631L, 925L, 880L, 932L, 876L, 926L, 1786L, 
    2079L, 2199L, 2345L, 2360L, 2408L, 2509L, 3129L, 3263L, 3714L, 
    917L, NA, 1066L, 1059L, 939L, 1269L, 1047L, 974L, 1048L, 
    1084L)),
    .Names = c("Genotype", "Tissue", "Cells/SC/Live/CD8—,,CD4+/Foxp3+,Median,<BV421-A>,CD127"),
    row.names = c(NA, -40L), class = c("tbl_df", "tbl", "data.frame"))
    

    library(ggplot2)
    library(ggpubr)
    color.groups <- c("black","red")
    names(color.groups) <- unique(df1$Genotype)
    shape.groups <- c(16, 1)
    names(shape.groups) <- unique(df1$Genotype)
    
    ggplot(df1, aes(x = Tissue, y = df1[3], color = Genotype, shape = Genotype)) +
      geom_boxplot(position = position_dodge(), outlier.shape = NA) +
      geom_point(position=position_dodge(width=0.75)) +
      ylim(0,1.2*max(df1[3], na.rm = TRUE)) +
      ylab('MFI CD127 (of CD4+ Foxp3+ T cells') +
      scale_color_manual(values=color.groups) +
      scale_shape_manual(values=shape.groups) +
      theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
                         panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
                         axis.title.x=element_blank(), aspect.ratio = 1,
                         text = element_text(size = 9)) +
      stat_compare_means(show.legend = FALSE, label = 'p.format', method = 't.test',
                         label.y = c(0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(1:10),], na.rm = TRUE),
                                     0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(11:20),], na.rm = TRUE),
                                     0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(21:30),], na.rm = TRUE),
                                     0.1*max(df1[3], na.rm = TRUE) + max(df1[3][c(31:40),], na.rm = TRUE)))
    

    enter image description here

    谢谢你的帮助!

    1 回复  |  直到 7 年前
        1
  •  3
  •   eipi10    7 年前

    我用三个调用创建了括号 geom_segment . 这些呼叫使用新的 dmax e r 用于调整这些位置。

    我对你的代码做了一些其他更改。

    1. 将第三列的名称更改为 temp 然后用这个名字 y=temp y=df1[3] ,它基本上到达绘图环境之外的 df1 对象,这可能会导致问题。此外,有一个简短的名称可供参考,从而更容易生成 D最大值

    2. 使用 数据帧 label.y 职位 stat_compare_means 标签y 位置,而不是从 aes

    3. 将p值标签定位在每对方框图上方的绝对距离(使用值 ),而不是乘法距离。这使得p值标签、括号和方框图之间的间距更容易保持一致。


    # Use a short column name for the third column
    names(df1)[3] = "temp"
    
    # Generate data frame of reference y-values for p-value labels and bracket positions
    dmax = df1 %>% group_by(Tissue) %>% 
      summarise(temp=max(temp, na.rm=TRUE),
                Genotype=NA)
    
    # For tweaking position of brackets
    e = 350
    r = 0.6
    w = 0.19
    bcol = "grey30"
    
    ggplot(df1, aes(x = Tissue, y = temp, color = Genotype, shape = Genotype)) +
      geom_boxplot(position = position_dodge(), outlier.shape = NA) +
      geom_point(position=position_dodge(width=0.75)) +
      ylim(0,1.2*max(df1[3], na.rm = TRUE)) +
      ylab('MFI CD127 (of CD4+ Foxp3+ T cells') +
      scale_color_manual(values=color.groups) +
      scale_shape_manual(values=shape.groups) +
      theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
                         panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
                         axis.title.x=element_blank(), aspect.ratio = 1,
                         text = element_text(size = 9)) +
      stat_compare_means(show.legend = FALSE, label = 'p.format', method = 't.test',
                         label.y = e + dmax$temp) +
      geom_segment(data=dmax,
                   aes(x=as.numeric(Tissue)-w, xend=as.numeric(Tissue)+w, 
                       y=temp + r*e, yend=temp + r*e), size=0.3, color=bcol, inherit.aes=FALSE) +
      geom_segment(data=dmax,
                   aes(x=as.numeric(Tissue) + w, xend=as.numeric(Tissue) + w, 
                       y=temp + r*e, yend=temp + r*e - 60), size=0.3, color=bcol, inherit.aes=FALSE) +
      geom_segment(data=dmax,
                   aes(x=as.numeric(Tissue) - w, xend=as.numeric(Tissue) - w, 
                       y=temp + r*e, yend=temp + r*e - 60), size=0.3, color=bcol, inherit.aes=FALSE)
    

    enter image description here

    首先,我们添加两个新的组织类别:

    library(forcats)
    
    df1$Tissue = fct_expand(df1$Tissue, "Tissue 5", "Tissue 6")
    df1$Tissue[seq(1,20,4)] = "Tissue 5"
    df1$Tissue[seq(21,40,4)] = "Tissue 6"
    
    dmax = df1 %>% group_by(Tissue) %>% 
      summarise(temp=max(temp, na.rm=TRUE),
                Genotype=NA)
    

    enter image description here