代码之家  ›  专栏  ›  技术社区  ›  user977828

读取多个文件并打印为一个打印

r
  •  0
  • user977828  · 技术社区  · 5 年前

    我有下面的代码,它只读一个 BAM file 把结果画出来。如何扩展代码以读取多个BAM文件和重叠绘图,以便只获取一个绘图?

    #install if necessary
    source("http://bioconductor.org/biocLite.R")
    biocLite("Rsamtools")
    
    #load library
    library(Rsamtools)
    
    #read in entire BAM file
    bam <- scanBam("~/sample5.sam-uniq.sorted.bam")
    
    #names of the BAM fields
    names(bam[[1]])
    # [1] "qname"  "flag"   "rname"  "strand" "pos"    "qwidth" "mapq"   "cigar"
    # [9] "mrnm"   "mpos"   "isize"  "seq"    "qual"
    
    #distribution of BAM flags
    table(bam[[1]]$flag)
    
    #      0       4      16 
    #1472261  775200 1652949
    
    #function for collapsing the list of lists into a single list
    #as per the Rsamtools vignette
    .unlist <- function (x){
      ## do.call(c, ...) coerces factor to integer, which is undesired
      x1 <- x[[1L]]
      if (is.factor(x1)){
        structure(unlist(x), class = "factor", levels = levels(x1))
      } else {
        do.call(c, x)
      }
    }
    
    #store names of BAM fields
    bam_field <- names(bam[[1]])
    
    #go through each BAM field and unlist
    list <- lapply(bam_field, function(y) .unlist(lapply(bam, "[[", y)))
    
    #store as data frame
    bam_df <- do.call("DataFrame", list)
    names(bam_df) <- bam_field
    
    dim(bam_df)
    #[1] 3900410      13
    
    #---------
    
    #use chr22 as an example
    #how many entries on the negative strand of chr22?
    ###table(bam_df$rname == 'chr22' & bam_df$flag == 16)
    # FALSE    TRUE 
    #3875997   24413
    
    #function for checking negative strand
    check_neg <- function(x){
      if (intToBits(x)[5] == 1){
        return(T)
      } else {
        return(F)
      }
    }
    
    #test neg function with subset of chr22
    test <- subset(bam_df)#, rname == 'chr22')
    dim(test)
    #[1] 56426    13
    table(apply(as.data.frame(test$flag), 1, check_neg))
    #number same as above
    #FALSE  TRUE 
    #32013 24413
    
    #function for checking positive strand
    check_pos <- function(x){
      if (intToBits(x)[3] == 1){
        return(F)
      } else if (intToBits(x)[5] != 1){
        return(T)
      } else {
        return(F)
      }
    }
    
    #check pos function
    table(apply(as.data.frame(test$flag), 1, check_pos))
    #looks OK
    #FALSE  TRUE 
    #24413 32013
    
    #store the mapped positions on the plus and minus strands
    neg <- bam_df[apply(as.data.frame(bam_df$flag), 1, check_neg),
                        'pos'
                        ]
    length(neg)
    #[1] 24413
    pos <- bam_df[apply(as.data.frame(bam_df$flag), 1, check_pos),
                        'pos'
                        ]
    length(pos)
    #[1] 32013
    
    #calculate the densities
    neg_density <- density(neg)
    pos_density <- density(pos)
    
    #display the negative strand with negative values
    neg_density$y <- neg_density$y * -1
    
    plot(pos_density,
         ylim = range(c(neg_density$y, pos_density$y)),
         main = "Coverage plot of Sample 5",
         xlab = "lenght 21",
         col = 'blue',
         type='h')
    lines(neg_density, type='h', col = 'red')
    

    提前谢谢你

    0 回复  |  直到 5 年前