代码之家  ›  专栏  ›  技术社区  ›  SteveS

两列之间的导程差异?

  •  0
  • SteveS  · 技术社区  · 6 年前

    这是我的数据框架:

    structure(list(replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 
    7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 
    10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 
    14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), start_time = c(164429106370979, 
    164429411618825, 164429837271940, 164430399454285, 164429106370980, 
    164429411618826, 164429837271941, 164430399454286, 164429106370981, 
    164429411618827, 164429837271942, 164430399454287, 164429106370982, 
    164429411618828, 164429837271943, 164430399454288, 164429106370983, 
    164429411618829, 164429837271944, 164430399454289, 164429106370984, 
    164429411618830, 164429837271945, 164430399454290, 164429106370985, 
    164429411618831, 164429837271946, 164430399454291, 164429106370986, 
    164429411618832, 164429837271947, 164430399454292, 164429106370987, 
    164429411618833, 164429837271948, 164430399454293, 164429106370988, 
    164429411618834, 164429837271949, 164430399454294, 164429106370989, 
    164429411618835, 164429837271950, 164430399454295, 164429106370990, 
    164429411618836, 164429837271951, 164430399454296, 164429106370991, 
    164429411618837, 164429837271952, 164430399454297, 164429106370992, 
    164429411618838, 164429837271953, 164430399454298, 164429106370993, 
    164429411618839, 164429837271954, 164430399454299), end_time = c(164429182443825, 
    164429512525748, 164429903243170, 164430465927555, 164429182443826, 
    164429512525749, 164429903243171, 164430465927556, 164429182443827, 
    164429512525750, 164429903243172, 164430465927557, 164429182443828, 
    164429512525751, 164429903243173, 164430465927558, 164429182443829, 
    164429512525752, 164429903243174, 164430465927559, 164429182443830, 
    164429512525753, 164429903243175, 164430465927560, 164429182443831, 
    164429512525754, 164429903243176, 164430465927561, 164429182443832, 
    164429512525755, 164429903243177, 164430465927562, 164429182443833, 
    164429512525756, 164429903243178, 164430465927563, 164429182443834, 
    164429512525757, 164429903243179, 164430465927564, 164429182443835, 
    164429512525758, 164429903243180, 164430465927565, 164429182443836, 
    164429512525759, 164429903243181, 164430465927566, 164429182443837, 
    164429512525760, 164429903243182, 164430465927567, 164429182443838, 
    164429512525761, 164429903243183, 164430465927568, 164429182443839, 
    164429512525762, 164429903243184, 164430465927569)), class = c("grouped_df", 
    "tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), vars = c("replicate", 
    "press_id"), drop = TRUE, indices = list(0L, 1L, 2L, 3L, 4L, 
        5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
        18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 
        30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 
        42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 
        54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
        replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
        3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 
        7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 
        11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 
        14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L, 
        2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
        1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
        4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
        3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA, 
    -60L), vars = c("replicate", "press_id"), drop = TRUE, indices = list(
        0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
        14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 
        26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 
        38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 
        50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L), group_sizes = c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
        replicate = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
        3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 
        7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 
        11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 
        14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L), press_id = c(1L, 
        2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 
        1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 
        4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
        3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), class = "data.frame", row.names = c(NA, 
    -60L), vars = c("replicate", "press_id"), drop = TRUE, .Names = c("replicate", 
    "press_id")), .Names = c("replicate", "press_id")), .Names = c("replicate", 
    "press_id", "start_time", "end_time"))
    

    我想要国际米兰 press_id 时间差,例如:

    replicate press_id  start_time      end_time      time_diff
            1        1  1.644291e+14    1.644292e+14  0 (it's a first row)  
            1        2  1.644294e+14    1.644295e+14  1.644294e+14 - 1.644292e+14   
            1        3  1.644298e+14    1.644299e+14  1.644298e+14 - 1.644295e+14
            1        4  1.644304e+14    1.644305e+14  .....
            2        1  1.644291e+14    1.644292e+14    
            2        2  1.644294e+14    1.644295e+14    
            2        3  1.644298e+14    1.644299e+14    
            2        4  1.644304e+14    1.644305e+14    
    

    我想用 mutate , lag , lead diff 但没有任何运气。我已经对数据集进行了分组和取消分组,没有任何帮助。

    df %>% 
    group_by(replicate) %>% 
    mutate(d = ifelse(row_number() == 1, 0, lead(start_time) - end_time))
    
    1 回复  |  直到 6 年前
        1
  •  2
  •   Lennyy    6 年前
    df %>% 
      group_by(replicate) %>% 
      mutate(d = start_time - lag(end_time))
    

    如果您希望复制列中每个唯一值的第一行中除nas之外的零,则可以执行以下操作:

    df %>% 
      group_by(replicate) %>% 
      mutate(d = start_time - lag(end_time),
             d = ifelse(is.na(d), 0, d)) 
    

    或者只是:

    df %>% 
      group_by(replicate) %>% 
      mutate(d = ifelse(row_number() == 1, 0, start_time - lag(end_time)))