代码之家  ›  专栏  ›  技术社区  ›  MYaseen208

tidyverse:在摘要输出中附加总计行

  •  0
  • MYaseen208  · 技术社区  · 3 年前

    我想在输出中附加总计行 summarise 与一起使用 group_by .

    Data <-
      structure(list(CT = c("1", "1", "1", "1", "1", "1", "1", "1", 
    "1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
    "2"), SCT = c("1", "1", "1", "1", "1", "1", "2", "2", "2", "2", 
    "2", "2", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2"), 
        SSCT = c("1", "2", "3", "1", "2", "3", "1", "2", "1", "2", 
        "1", "2", "1", "2", "1", "2", "1", "2", "3", "1", "2", "3"
        ), Category = c(111L, 112L, 113L, 111L, 112L, 113L, 121L, 
        122L, 121L, 122L, 121L, 122L, 211L, 212L, 211L, 212L, 221L, 
        222L, 223L, 221L, 222L, 223L), Y = c(10L, 12L, 15L, 11L, 
        10L, 13L, 21L, 22L, 20L, 25L, 23L, 24L, 31L, 33L, 36L, 39L, 
        32L, 31L, 36L, 41L, 44L, 45L)), row.names = c(NA, -22L), class = "data.frame")
    head(Data)
      CT SCT SSCT Category  Y
    1  1   1    1      111 10
    2  1   1    2      112 12
    3  1   1    3      113 15
    4  1   1    1      111 11
    5  1   1    2      112 10
    6  1   1    3      113 13
    

    在这里

    CT:类别,SCT:子类别,SSCT:子类别

    Data %>%
      group_by(CT) %>% 
      summarise(Total = sum(Y))
    
    # A tibble: 2 x 2
      CT    Total
      <chr> <int>
    1 1       206
    2 2       368
    
    
    Data %>%
      group_by(CT, SCT) %>% 
      summarise(Total = sum(Y))
    
    # A tibble: 4 x 3
    # Groups:   CT [2]
      CT    SCT   Total
      <chr> <chr> <int>
    1 1     1        71
    2 1     2       135
    3 2     1       139
    4 2     2       229
    
    Data %>%
      group_by(CT, SCT, SSCT) %>% 
      summarise(Total = sum(Y))
    # A tibble: 10 x 4
    # Groups:   CT, SCT [4]
       CT    SCT   SSCT  Total
       <chr> <chr> <chr> <int>
     1 1     1     1        21
     2 1     1     2        22
     3 1     1     3        28
     4 1     2     1        64
     5 1     2     2        71
     6 2     1     1        67
     7 2     1     2        72
     8 2     2     1        73
     9 2     2     2        75
    10 2     2     3        81
    

    所需输出

    需要输出以下内容:

    CT    SCT   SSCT  Total
    Total               206
    Total                71
    1     1     1        21
    1     1     2        22
    1     1     3        28
    Total               135
    1     2     1        64
    1     2     2        71
    Total               368
    Total               139
    2     1     1        67
    2     1     2        72
    Total               229
    2     2     1        73
    2     2     2        75
    2     2     3        81
    
    1 回复  |  直到 3 年前
        1
  •  1
  •   zimia    3 年前

    如果希望最后的列是数字而不是字符,则必须接受NA而不是“”

    bind_rows(
      Data %>% 
        group_by(CT) %>% 
        summarise(Total = sum(Y)) %>% 
        mutate(SCT = "Total", SSCT = "Total"),
      
      Data %>%
        group_by(CT, SCT) %>% 
        summarise(Total = sum(Y)) %>% 
        mutate(SSCT = "Total"),
      
      
      Data %>%
        group_by(CT, SCT, SSCT) %>% 
        summarise(Total = sum(Y))
    ) %>% 
      mutate(across(c("SCT","SSCT"), ~ fct_relevel(., "Total"))) %>% 
      arrange(CT, SCT, SSCT) %>% 
      mutate(CT = ifelse(SSCT == "Total", "Total", CT),
             SCT = ifelse(SSCT == "Total", "", as.numeric(SCT)-1),
             SSCT = gsub("Total", "", SSCT)) %>%  
      select(CT, SCT, SSCT, Total) 
    
    #  A tibble: 16 x 4
        CT    SCT   SSCT  Total
       <chr> <chr> <chr> <int>
     1 Total ""    ""      206
     2 Total ""    ""       71
     3 1     "1"   "1"      21
     4 1     "1"   "2"      22
     5 1     "1"   "3"      28
     6 Total ""    ""      135
     7 1     "2"   "1"      64
     8 1     "2"   "2"      71
     9 Total ""    ""      368
    10 Total ""    ""      139
    11 2     "1"   "1"      67
    12 2     "1"   "2"      72
    13 Total ""    ""      229
    14 2     "2"   "1"      73
    15 2     "2"   "2"      75
    16 2     "2"   "3"      81
    
        2
  •  1
  •   G. Grothendieck    3 年前

    1. 我们可以使用 adorn_totals 从看门人的包裹里。在看门人中,总计通常在总计的组之后,但我们可以使用名称“0”代替总计并进行排序,使总计先排序,然后在末尾用单词“总计”替换“0”。过滤器会删除包含多个带单词“总计”的字段的行。

    library(dplyr)
    library(janitor)
    
    Data %>% 
      group_by(CT, SCT, SSCT) %>%
      summarize(Y = sum(Y), .groups = "drop") %>%
      group_by(CT) %>%
      group_modify(~ adorn_totals(., name = "0")) %>%
      group_by(CT, SCT) %>%
      group_modify(~ adorn_totals(., name = "0")) %>%
      ungroup %>%
      filter(rowSums(across() == "0") <= 1) %>%
      arrange(CT, SCT, SSCT) %>%
      mutate(across(where(is.character), ~ replace(., . == "0", "Total")))
    

    给:

    # A tibble: 16 x 4
       CT    SCT   SSCT      Y
       <chr> <chr> <chr> <int>
     1 1     Total -       206
     2 1     1     Total    71
     3 1     1     1        21
     4 1     1     2        22
     5 1     1     3        28
     6 1     2     Total   135
     7 1     2     1        64
     8 1     2     2        71
     9 2     Total -       368
    10 2     1     Total   139
    11 2     1     1        67
    12 2     1     2        72
    13 2     2     Total   229
    14 2     2     1        73
    15 2     2     2        75
    16 2     2     3        81
    

    2. 如果可以将总数放在组后而不是组前,那么就可以简化。

    Data %>% 
      group_by(CT, SCT, SSCT) %>%
      summarize(Y = sum(Y), .groups = "drop") %>%
      group_by(CT) %>%
      group_modify(~ adorn_totals(.)) %>%
      group_by(CT, SCT) %>%
      group_modify(~ adorn_totals(.)) %>%
      ungroup %>%
      filter(rowSums(across() == "Total") <= 1)
    

    给:

    # A tibble: 16 x 4
       CT    SCT   SSCT      Y
       <chr> <chr> <chr> <int>
     1 1     1     1        21
     2 1     1     2        22
     3 1     1     3        28
     4 1     1     Total    71
     5 1     2     1        64
     6 1     2     2        71
     7 1     2     Total   135
     8 1     Total -       206
     9 2     1     1        67
    10 2     1     2        72
    11 2     1     Total   139
    12 2     2     1        73
    13 2     2     2        75
    14 2     2     3        81
    15 2     2     Total   229
    16 2     Total -       368
    
        3
  •  1
  •   IceCreamToucan    3 年前

    您可以使用 rollup 在data.table包中

    library(data.table)
    setDT(Data)
    
    group_vars <-  c('CT', 'SCT', 'SSCT')
    piv <- 
      rollup(Data, j = .(Total = sum(Y)), by = group_vars)
                   
    setorderv(piv, group_vars)[-1]
    #>     CT  SCT SSCT Total
    #>  1:  1 <NA> <NA>   206
    #>  2:  1    1 <NA>    71
    #>  3:  1    1    1    21
    #>  4:  1    1    2    22
    #>  5:  1    1    3    28
    #>  6:  1    2 <NA>   135
    #>  7:  1    2    1    64
    #>  8:  1    2    2    71
    #>  9:  2 <NA> <NA>   368
    #> 10:  2    1 <NA>   139
    #> 11:  2    1    1    67
    #> 12:  2    1    2    72
    #> 13:  2    2 <NA>   229
    #> 14:  2    2    1    73
    #> 15:  2    2    2    75
    #> 16:  2    2    3    81
    

    创建于2021-06-05 reprex package (v2.0.0)