代码之家  ›  专栏  ›  技术社区  ›  jakes

如何返回不同组中最后n行的平均值(由变量表示)

  •  0
  • jakes  · 技术社区  · 6 年前

    data <- structure(list(seq = c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 
    4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 
    7L, 7L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L), new_seq = c(2, 2, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    2, 2, 2, 2, NA, NA, NA, NA, NA, 4, 4, 4, 4, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 6, 6, 6, 6, 6, NA, NA, 8, 8, 8, NA, NA, NA), value = c(2L, 
    0L, 0L, 1L, 0L, 5L, 5L, 3L, 0L, 3L, 2L, 3L, 2L, 3L, 4L, 1L, 0L, 
    0L, 0L, 1L, 1L, 0L, 2L, 5L, 3L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 3L, 
    5L, 3L, 1L, 1L, 1L, 0L, 1L, 0L, 4L, 3L, 0L, 3L, 1L, 3L, 0L, 0L, 
    1L, 0L, 0L, 3L, 4L, 5L, 3L, 5L, 3L, 5L, 0L, 1L, 1L, 3L, 2L, 1L, 
    0L, 0L, 0L, 0L, 5L, 1L, 1L, 0L, 4L, 1L, 5L, 0L, 3L, 1L, 2L, 1L, 
    0L, 3L, 0L, 1L, 1L, 3L, 0L, 1L, 1L, 2L, 2L, 1L, 0L, 4L, 0L, 0L, 
    3L, 0L, 0L)), row.names = c(NA, -100L), class = c("tbl_df", "tbl", 
    "data.frame"))
    

    new_seq 指的是 seq . 对于中的每个值 新建\u seq 哪一个不是 NA 2 value 序号 . 例如,行 1:2 新列的值应为 0.5 (行的平均值) 49:50 ),行 51:54 0.5 49:50 也一样),但是 60:63 4 (行的平均值) 58:59 ). 我怎么能用它呢 tidyverse

    2 回复  |  直到 6 年前
        1
  •  2
  •   Z.Lin    6 年前

    像这样的?

    # calculate the mean value based on the last two rows of each seq
    lookup <- data %>%
      group_by(seq) %>%
      mutate(rank = seq(n(), 1)) %>% 
      filter(rank <= 2) %>%
      summarise(new_column = mean(value)) %>%
      ungroup()
    
    # match back to original dataset (only non-NA values of new_seq can be matched)
    left_join(data, lookup, by = c("new_seq" = "seq"))
    

    # A tibble: 100 x 4
         seq new_seq value new.column
       <int>   <dbl> <int>      <dbl>
     1     1       2     2        0.5
     2     1       2     0        0.5
     3     2      NA     0       NA  
     4     2      NA     1       NA  
    ...
    
        2
  •  0
  •   divibisan    6 年前

    嗯,只有一半 tidyverse 我相信有人可以做得更好,但这里有一个尝试。

    group_by mutate 使计算组中最后两行的平均值变得容易,但我不知道如何获得组之间的连接 seq new_seq

    dat2 <- dat %>%
        group_by(seq) %>%
        mutate(end_val = (nth(value, -1L) + nth(value, -2L))/2)
    
    dat3$result <- apply(dat2, 1, function(x) {
        dat2[dat2$seq == x['new_seq'], 'end_val'][[1]][1]
    })
    

    rowid

    dat3 %>% tibble::rowid_to_column() %>% .[c(1:3,50:55,59:64),] 
    
    # A tibble: 15 x 6
    # Groups:   seq [6]
       rowid   seq new_seq value end_val result
       <int> <int>   <dbl> <int>   <dbl>  <dbl>
     1     1     1       2     2     1      0.5
     2     2     1       2     0     1      0.5
     3     3     2      NA     0     0.5   NA  
     4    50     2      NA     1     0.5   NA  
     5    51     3       2     0     3.5    0.5
     6    52     3       2     0     3.5    0.5
     7    53     3       2     3     3.5    0.5
     8    54     3       2     4     3.5    0.5
     9    55     4      NA     5     4     NA  
    10    59     4      NA     5     4     NA  
    11    60     5       4     0     2      4  
    12    61     5       4     1     2      4  
    13    62     5       4     1     2      4  
    14    63     5       4     3     2      4  
    15    64     6      NA     2     2     NA