代码之家  ›  专栏  ›  技术社区  ›  andemexoax

输入与R中的滚动连接类似的前一行数据

  •  1
  • andemexoax  · 技术社区  · 6 年前

    我有一个复杂的问题。我缺少需要插补的值,但插补需要按数据框架中的组进行(因为它们是在不同的时间收集的),我不想按平均值进行。我需要使用先前记录的值,如果它不是为特定观察而记录的。

    以下是一些示例数据:

    sample_data <- data.frame(Class = rep(x= letters[1:10], each=100),
                              group= rep(x=c("inside", "outside"), each=50),
                              Sample_number = seq(1,50,by=1),
                                x1= rnorm(1000,mean=0, sd=.5), 
                                x2= 0)
    sample_data$Class_group <- paste0(sample_data$Class,"_", sample_data$group)
    sample_data$Class_group <- as.factor(sample_data$Class_group)
    sample_data$x1[sample_data$x1 < 0] <- NA
    

    library(data.table)
    
    varieties=levels(sample_data$Class_group)
    
    
    for (i in 1:length(levels(sample_data$Class_group))){
    
      variety_subset <- subset(sample_data, sample_data$Class_group==varieties[i])
    
      for (ii in 1:nrow(variety_subset)){
        temp_df <- subset(variety_subset, variety_subset$Sample_number==ii)
        if(is.number(temp_df$x1)){
          variety_subset$x2 <- variety_subset$x1
        } else {
          variety_subset[ , x2 := shift(x2, n=1L, type="lag")]
        }}}
    

    rolling join

    dplyr mutate this question for

    =IF(ISNUMBER(A2), A2, B1)

    Class,group,Sample_number,x1,x2,Class_group
    a   inside  1   NA     0    a_inside
    a   inside  2   NA     0    a_inside
    a   inside  3   NA     0    a_inside
    a   inside  4   NA     0    a_inside
    a   inside  5   0.57   0    a_inside
    a   inside  6   NA     0    a_inside
    a   inside  7   NA     0    a_inside
    a   inside  8   NA     0    a_inside
    a   inside  9   0.43    0   a_inside
    a   inside  10  0.19    0   a_inside
    a   inside  11  0.09    0   a_inside
    a   inside  12  0.13    0   a_inside
    a   inside  13  0.68    0   a_inside
    a   inside  14  0.50    0   a_inside
    a   inside  15  0.57    0   a_inside
    

    Class,group,Sample_number,x1,x2,Class_group
    a   inside  1   NA      0.57    a_inside
    a   inside  2   NA      0.57    a_inside
    a   inside  3   NA      0.57    a_inside
    a   inside  4   NA      0.57    a_inside
    a   inside  5   0.57    0.57    a_inside
    a   inside  6   NA      0.57    a_inside
    a   inside  7   NA      0.57    a_inside
    a   inside  8   NA      0.43    a_inside
    a   inside  9   0.43    0.43    a_inside
    a   inside  10  0.19    0.19    a_inside
    a   inside  11  0.09    0.09    a_inside
    a   inside  12  0.13    0.13    a_inside
    a   inside  13  0.68    0.68    a_inside
    a   inside  14  0.50    0.50    a_inside
    a   inside  15  0.57    0.57    a_inside
    
    2 回复  |  直到 6 年前
        1
  •  3
  •   Ameya    6 年前

    你可能需要 ?zoo::na.locf .

    library(zoo)
    library(data.table)
    set.seed(1) # Use for reproducibility
    sample_data <- data.frame(Class = rep(x= letters[1:10], each=100),
                              group= rep(x=c("inside", "outside"), each=50),
                              Sample_number = seq(1,50,by=1),
                              x1= rnorm(1000,mean=0, sd=.5), 
                              x2= 0)
    sample_data$Class_group <- paste0(sample_data$Class,"_", sample_data$group)
    sample_data$Class_group <- as.factor(sample_data$Class_group)
    sample_data$x1[sample_data$x1 < 0] <- NA
    varieties=levels(sample_data$Class_group)
    setDT(sample_data)
    sample_data[
      , x2:=ifelse(
        is.na(na.locf(x1, na.rm = FALSE)), 
        na.locf(x1, na.rm = FALSE, fromLast = TRUE), 
        na.locf(x1, na.rm = FALSE)
      ), 
      by = 'Class_group'
    ]
    > sample_data[Class_group == 'a_inside'][1:10]
        Class  group Sample_number         x1         x2 Class_group
     1:     a inside             1         NA 0.09182166    a_inside
     2:     a inside             2 0.09182166 0.09182166    a_inside
     3:     a inside             3         NA 0.09182166    a_inside
     4:     a inside             4 0.79764040 0.79764040    a_inside
     5:     a inside             5 0.16475389 0.16475389    a_inside
     6:     a inside             6         NA 0.16475389    a_inside
     7:     a inside             7 0.24371453 0.24371453    a_inside
     8:     a inside             8 0.36916235 0.36916235    a_inside
     9:     a inside             9 0.28789068 0.28789068    a_inside
    10:     a inside            10         NA 0.28789068    a_inside
    

    作为旁白,不需要创建中间变量 Class_group . 你可以用 by = c('Class', 'group') 达到同样的效果。

        2
  •  2
  •   chinsoon12    6 年前

    使用 roll="nearest" 为确保具有连续NAS的案例更接近OP的请求:

    sample_data[, x2 := sample_data[!is.na(x1)][
        sample_data, x1, on=.(Class_group, Sample_number), roll="nearest"]][]
    

    输出:

        Class  group Sample_number   x1   x2 Class_group
     1:     a inside             1   NA 0.57    a_inside
     2:     a inside             2   NA 0.57    a_inside
     3:     a inside             3   NA 0.57    a_inside
     4:     a inside             4   NA 0.57    a_inside
     5:     a inside             5 0.57 0.57    a_inside
     6:     a inside             6   NA 0.57    a_inside
     7:     a inside             7   NA 0.57    a_inside
     8:     a inside             8   NA 0.43    a_inside
     9:     a inside             9 0.43 0.43    a_inside
    10:     a inside            10 0.19 0.19    a_inside
    11:     a inside            11 0.09 0.09    a_inside
    12:     a inside            12 0.13 0.13    a_inside
    13:     a inside            13 0.68 0.68    a_inside
    14:     a inside            14 0.50 0.50    a_inside
    15:     a inside            15 0.57 0.57    a_inside
    

    样本数据:

    library(data.table)
    sample_data <- fread("Class    group    Sample_number    x1    x2    Class_group
    a   inside  1   NA     0    a_inside
    a   inside  2   NA     0    a_inside
    a   inside  3   NA     0    a_inside
    a   inside  4   NA     0    a_inside
    a   inside  5   0.57   0    a_inside
    a   inside  6   NA     0    a_inside
    a   inside  7   NA     0    a_inside
    a   inside  8   NA     0    a_inside
    a   inside  9   0.43    0   a_inside
    a   inside  10  0.19    0   a_inside
    a   inside  11  0.09    0   a_inside
    a   inside  12  0.13    0   a_inside
    a   inside  13  0.68    0   a_inside
    a   inside  14  0.50    0   a_inside
    a   inside  15  0.57    0   a_inside")
    sample_data[, x2 := as.numeric(x2)]
    
    推荐文章