代码之家  ›  专栏  ›  技术社区  ›  Alexander

有条件提取连续递减数

  •  0
  • Alexander  · 技术社区  · 6 年前

    我有一个连续数字减少的数据帧,有些是先减少后增加再减少(分组)。

    我需要提取这个连续的减少部分,并删除非系统的增加和减少部分!

    这是我的意思的测试数据

    test=data.frame(set=gl(3,9),vals=c(c(10,10,10, 9.9, 8.1, 1, 1,1,1),c(10,10,10, 9.9,6.1,1, 2,1,1),c(10,10,10, 7,6,1,2,0,1)))
    
    > test
       set vals
    1    1 10.0
    2    1 10.0
    3    1 10.0
    4    1  9.9
    5    1  8.1
    6    1  1.0
    7    1  1.0
    8    1  1.0
    9    1  1.0
    10   2 10.0
    11   2 10.0
    12   2 10.0
    13   2  9.9
    14   2  6.1
    15   2  1.0
    16   2  2.0
    17   2  1.0
    18   2  1.0
    19   3 10.0
    20   3 10.0
    21   3 10.0
    22   3  7.0
    23   3  6.0
    24   3  1.0
    25   3  2.0
    26   3  0.0
    27   3  1.0
    

    我写了一个简单的函数 slice_it 找出数据中连续减少的s

    slice_it <-  function(x){
    
      temp <- c(0,diff(x))
    
      }
    
    library(dplyr)
    test%>%
      group_by(set)%>%
      mutate(diff_x=slice_it(vals))
    

    哪个给了

      set vals diff_x
    1    1 10.0    0.0  #remove
    2    1 10.0    0.0  #remove
    3    1 10.0    0.0  #remove
    4    1  9.9   -0.1  #keep 
    5    1  8.1   -1.8  #keep 
    6    1  1.0   -7.1  #keep
    7    1  1.0    0.0  #remove 
    8    1  1.0    0.0  #remove
    9    1  1.0    0.0  #remove
    10   2 10.0    0.0  #remove
    11   2 10.0    0.0  #remove
    12   2 10.0    0.0  #remove
    13   2  9.9   -0.1  #keep
    14   2  6.1   -3.8  #keep
    15   2  1.0   -5.1  #keep
    16   2  2.0    1.0  #remove
    17   2  1.0   -1.0  #remove 
    18   2  1.0    0.0  #remove
    19   3 10.0    0.0  #remove
    20   3 10.0    0.0  #remove
    21   3 10.0    0.0  #remove
    22   3  7.0   -3.0  #keep
    23   3  6.0   -1.0  #keep
    24   3  1.0   -5.0  #keep
    25   3  2.0    1.0  #remove
    26   3  0.0   -2.0  #remove
    27   3  1.0    1.0  #remove
    

    如果我将过滤器添加到 dplyr 链 哪个给了

    filter(diff_x<0)
    
    # A tibble: 11 x 3
    # Groups:   set [3]
      set    vals  diff_x
     1 1       9.9 -0.1000 #keep
     2 1       8.1 -1.8    #keep
     3 1       1   -7.1    #keep 
     4 2       9.9 -0.1000 #keep 
     5 2       6.1 -3.8    #keep
     6 2       1   -5.1    #keep 
     7 2       1   -1      #remove
     8 3       7   -3      #keep
     9 3       6   -1      #keep
    10 3       1   -5      #keep
    11 3       0   -2      #remove
    

    我指示的行 #remove 仍然是他们的 diff <0.但这种情况发生后,前一个数字增加,所以应该删除!

    预期应该是

      set vals diff_x
    
    4    1  9.9   -0.1  #keep 
    5    1  8.1   -1.8  #keep 
    6    1  1.0   -7.1  #keep
    13   2  9.9   -0.1  #keep
    14   2  6.1   -3.8  #keep
    15   2  1.0   -5.1  #keep
    22   3  7.0   -3.0  #keep
    23   3  6.0   -1.0  #keep
    24   3  1.0   -5.0  #keep
    

    我怎样才能做到这一点。 谢谢!

    最后一部分的切片没有帮助,因为底部有多少行是不确定的。

    1 回复  |  直到 6 年前
        1
  •  1
  •   Maurits Evers    6 年前

    如果我理解正确,我们可以用一秒钟 diff lag

    test %>%
        rowid_to_column("row") %>%
        group_by(set) %>%
        mutate(
            diff = c(0, diff(vals)),
            diff2 = c(0, diff(lag(vals)))) %>%
        filter(diff < 0 & diff2 <= 0) %>%
        select(-diff2)
    ## A tibble: 9 x 4
    ## Groups:   set [3]
    #    row set    vals    diff
    #  <int> <fct> <dbl>   <dbl>
    #1     4 1      9.90 -0.1000
    #2     5 1      8.10 -1.80
    #3     6 1      1.00 -7.10
    #4    13 2      9.90 -0.1000
    #5    14 2      6.10 -3.80
    #6    15 2      1.00 -5.10
    #7    22 3      7.00 -3.00
    #8    23 3      6.00 -1.00
    #9    24 3      1.00 -5.00
    

    slice_it

    slice_it <-  function(x) c(0, diff(x))
    test %>%
        group_by(set) %>%
        mutate(diff_x = slice_it(vals)) %>%
        filter(diff_x < 0 & slice_it(lag(vals)) <= 0)