代码之家  ›  专栏  ›  技术社区  ›  user977828

密度错误。默认值(x=neg):找不到对象“neg”

  •  1
  • user977828  · 技术社区  · 6 年前

    我试图改写以下内容 plot_densities 用途 ggplot2 .

    plot_densities <- function(density) {
      neg_density <- density[[1]]
      pos_density <- density[[2]]
    
      plot(
        pos_density,
        ylim = range(c(neg_density$y, pos_density$y)),
        main = "Coverage plot of Sample 5",
        xlab = "lenght 21",
        col = 'blue',
        type = 'h'
      )
      lines(neg_density, type = 'h', col = 'red')
    }
    

    毫无疑问,下面的新功能 Error in density.default(x = neg) : object 'neg' not found

    plot_densities2 <- function(density) {
      neg_density <- density[[1]]
      pos_density <- density[[2]]
    
      densities = append(neg_density, pos_density)
    
      ggplot(as.data.frame(densities), aes(x=x, y=y)) + 
        theme_bw() +
        geom_density(alpha=0.5)
    }
    

    完整的代码可以在下面找到,数据可以从下载 here

    #apt update && apt install zlib1g-dev
    
    #install if necessary
    source("http://bioconductor.org/biocLite.R")
    biocLite("Rsamtools")
    
    #load library
    library(Rsamtools)
    
    extracting_pos_neg_reads <- function(bam_fn) {
    
      #read in entire BAM file
      bam <- scanBam(bam_fn)
    
      #names of the BAM fields
      names(bam[[1]])
      # [1] "qname"  "flag"   "rname"  "strand" "pos"    "qwidth" "mapq"   "cigar"
      # [9] "mrnm"   "mpos"   "isize"  "seq"    "qual"
    
      #distribution of BAM flags
      table(bam[[1]]$flag)
    
      #      0       4      16
      #1472261  775200 1652949
    
      #function for collapsing the list of lists into a single list
      #as per the Rsamtools vignette
      .unlist <- function (x) {
        ## do.call(c, ...) coerces factor to integer, which is undesired
        x1 <- x[[1L]]
        if (is.factor(x1)) {
          structure(unlist(x), class = "factor", levels = levels(x1))
        } else {
          do.call(c, x)
        }
      }
    
      #store names of BAM fields
      bam_field <- names(bam[[1]])
    
      #go through each BAM field and unlist
      list <- lapply(bam_field, function(y)
        .unlist(lapply(bam, "[[", y)))
    
      #store as data frame
      bam_df <- do.call("DataFrame", list)
      names(bam_df) <- bam_field
    
      dim(bam_df)
      #[1] 3900410      13
    
      #---------
    
      #use chr22 as an example
      #how many entries on the negative strand of chr22?
      ###table(bam_df$rname == 'chr22' & bam_df$flag == 16)
      # FALSE    TRUE
      #3875997   24413
    
      #function for checking negative strand
      check_neg <- function(x) {
        if (intToBits(x)[5] == 1) {
          return(T)
        } else {
          return(F)
        }
      }
    
      #test neg function with subset of chr22
      test <- subset(bam_df)#, rname == 'chr22')
      dim(test)
      #[1] 56426    13
      table(apply(as.data.frame(test$flag), 1, check_neg))
      #number same as above
      #FALSE  TRUE
      #32013 24413
    
      #function for checking positive strand
      check_pos <- function(x) {
        if (intToBits(x)[3] == 1) {
          return(F)
        } else if (intToBits(x)[5] != 1) {
          return(T)
        } else {
          return(F)
        }
      }
    
      #check pos function
      table(apply(as.data.frame(test$flag), 1, check_pos))
      #looks OK
      #FALSE  TRUE
      #24413 32013
    
      #store the mapped positions on the plus and minus strands
      neg <- bam_df[apply(as.data.frame(bam_df$flag), 1, check_neg),
                    'pos']
      length(neg)
      #[1] 24413
      pos <- bam_df[apply(as.data.frame(bam_df$flag), 1, check_pos),
                    'pos']
      length(pos)
      #[1] 32013
    
      #calculate the densities
      neg_density <- density(neg)
      pos_density <- density(pos)
    
      #display the negative strand with negative values
      neg_density$y <- neg_density$y * -1
    
      return (list(neg_density, pos_density))
    
    }
    
    plot_densities <- function(density) {
      neg_density <- density[[1]]
      pos_density <- density[[2]]
    
      plot(
        pos_density,
        ylim = range(c(neg_density$y, pos_density$y)),
        main = "Coverage plot of Sample 5",
        xlab = "lenght 21",
        col = 'blue',
        type = 'h'
      )
      lines(neg_density, type = 'h', col = 'red')
    }
    
    
    plot_densities2 <- function(density) {
      neg_density <- density[[1]]
      pos_density <- density[[2]]
    
      densities = append(neg_density, pos_density)
      densities
    
    
      ggplot(as.data.frame(densities), aes(x=x, y=y)) + 
        theme_bw() +
        geom_density(alpha=0.5)
    }
    
    filenames <- c("~/sample5-21.sam-uniq.sorted.bam", "~/sample5-24.sam-uniq.sorted.bam")
    
    for ( i in filenames){ 
      print(i)
      density <- extracting_pos_neg_reads(i)
      plot_densities2(density)
    }
    
    1 回复  |  直到 6 年前
        1
  •  2
  •   Julius Vainora    6 年前

    密度物体似乎不是最好的 append as.data.frame . 特别是,它们包含一些导致问题的元素,但同时又是不必要的。我们只能选择 x y 构成相关数据框架的要素:

    plot_densities2 <- function(density) {
      densities <- cbind(rbind(data.frame(density[[1]][1:2]), data.frame(density[[2]][1:2])), 
                         id = rep(c("neg", "pos"), each = length(density[[1]]$x)))
      print(ggplot(data = densities, aes(x = x, y = y, fill = id)) + 
              theme_bw() + geom_area(alpha = 0.5))
    }
    

    enter image description here