我有一个数据框:
df <- structure(list(Sample = c("1: FL_643", "2: FL_645", "3: FL_647","4: FL_656", "5: FL_658", "6: cKO_644", "7: cKO_646", "8: cKO_654","9: cKO_655", "10: cKO_657", "1: FL_643", "2: FL_645", "3: FL_647", "4: FL_656", "5: FL_658", "6: cKO_644", "7: cKO_646", "8: cKO_654", "9: cKO_655", "10: cKO_657"), Genotype = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("miR-15/16 FL", "miR-15/16 cKO"), class = "factor"),
Tissue = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Thymus",
"iLN", "Spleen", "Skin", "Colon"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("Cells/SC/Live/CD8â,, CD4+,Freq. of Parent",
"Cells/SC/Live/CD8â,, CD4+/Foxp3+,Freq. of Parent", "Cells/SC/Live/CD8â,, CD4+/Foxp3+,Median,<BV421-A>,CD127",
"Cells/SC/Live/CD8â,, CD4+/Foxp3+/CD25+,Freq. of Parent",
"Cells/SC/Live/CD8â,, CD4+/Foxp3+/CD25-,Freq. of Parent",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-,Freq. of Parent", "Cells/SC/Live/CD8â,, CD4+/Foxp3-,Median,<BV421-A>,CD127",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62Lâ,, CD44â,Freq. of Parent",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62Lâ,, CD44+,Freq. of Parent",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62L+,, CD44â,Freq. of Parent",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62L+,, CD44+,Freq. of Parent",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD44+,Freq. of Parent",
"Cells/SC/Live/CD8+,, CD4â,Freq. of Parent", "Cells/SC/Live/CD8+,, CD4â,Median,<BV421-A>,CD127",
"Cells/SC/Live/CD8+,, CD4â/CD62Lâ,, CD44â,Freq. of Parent",
"Cells/SC/Live/CD8+,, CD4â/CD62Lâ,, CD44+,Freq. of Parent",
"Cells/SC/Live/CD8+,, CD4â/CD62L+,, CD44â,Freq. of Parent",
"Cells/SC/Live/CD8+,, CD4â/CD62L+,, CD44+,Freq. of Parent",
"Cells/SC/Live/CD8+,, CD4â/CD62L+,, CD44+,Freq. of Parent_1",
"Cells/SC/Live,Count", "Cells/SC/Live/CD8â,, CD4+,Count",
"Cells/SC/Live/CD8â,, CD4+/Foxp3+,Count", "Cells/SC/Live/CD8â,, CD4+/Foxp3+/CD25+,Count",
"Cells/SC/Live/CD8â,, CD4+/Foxp3+/CD25-,Count", "Cells/SC/Live/CD8â,, CD4+/Foxp3-,Count",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62Lâ,, CD44â,Count",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62Lâ,, CD44+,Count",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62L+,, CD44â,Count",
"Cells/SC/Live/CD8â,, CD4+/Foxp3-/CD62L+,, CD44+,Count",
"Cells/SC/Live/CD8+,, CD4â,Count", "Cells/SC/Live/CD8+,, CD4â/CD62Lâ,, CD44â,Count",
"Cells/SC/Live/CD8+,, CD4â/CD62Lâ,, CD44+,Count", "Cells/SC/Live/CD8+,, CD4â/CD62L+,, CD44â,Count",
"Cells/SC/Live/CD8+,, CD4â/CD62L+,, CD44+,Count"), class = "factor"),
value = c(41.2, 35.5, 39.5, 33.2, 39.1, 35.5, 35.7, 33.9,
39.7, 42.4, 10.9, 12.1, 10.9, 12.5, 12.3, 12.8, 14.1, 15.8,
14.6, 12.5)), .Names = c("Sample", "Genotype", "Tissue", "variable", "value"), row.names = c(NA, -20L), class = "data.frame")
和am使用以下函数绘制数据的各种组合
library(ggplot2)
library(ggpubr)
plot_it <- function(Tissue,
row_add = (1:nrow(temp)),
y.lab = "Did you forget to add a label?",
font_choice = "Helvetica",
font_size = 12,
stat_test = "t.test",
p_display = "p.signif",
legend_position = c("right")) {
# Subset data frame based on row_add
rownames(temp) <- NULL
df <- droplevels(temp[c(row_add),])
rownames(df) <- NULL
View(temp)
# Define color and shape of variables
color.groups <- c("black","red")
names(color.groups) <- unique(df$Genotype)
shape.groups <- c(16, 1)
names(shape.groups) <- unique(df$Genotype)
# Generate data frame of reference y-values for p-value labels and bracket positions
dmax = df %>% group_by(variable) %>%
summarise(value=max(value, na.rm=TRUE),
Genotype=NA)
# For tweaking position of brackets
e = max(dmax$value)*0.1
r = 0.6
w = 0.19
bcol = "black"
# Define y axis and wrap label
y.axis <- df$value
y.lab <- str_wrap(y.lab, width = 40)
ggplot(df, aes(x = variable, y = value, color = Genotype, shape = Genotype)) +
# geom_violin(position = position_dodge(width = 0.75)) +
geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NULL) +
geom_point(position=position_dodge(width=0.75), size = 2) +
ylim(0,1.2*max(y.axis, na.rm = TRUE)) + ylab(y.lab) + xlab(df$Tissue) +
scale_color_manual(values=color.groups) +
scale_shape_manual(values=shape.groups) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 20)) +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
aspect.ratio = 1, text = element_text(family=font_choice, size = font_size),
legend.position = legend_position) +
stat_compare_means(show.legend = FALSE, label = p_display, method = stat_test,
label.y = e + dmax$value, family = font_choice) +
geom_segment(data=dmax,
aes(x=as.numeric(variable)-w, xend=as.numeric(variable)+w,
y=value + r*e, yend=value + r*e), size=0.3, color=bcol, inherit.aes=FALSE)
}
通过使用以下函数调用进行绘图:
plot_it(Tissue = "Thymus", row_add = c(c(1:30), c(141:150)))
我想让函数创建一个小平面包裹,将有效地将y轴转换为两段,以便两段的比例允许更好的数据可视化。事实上,当我用具有高度不同值的变量绘制图时,比例不适合所有这些变量。
如果这不可能,那么有没有一种简单的方法可以在每个绘图的基础上手动引入镶嵌面包裹来分解y轴?