我认为当你要求R找到一个
lead
a)在表行末尾之外,或b)在组之外。您可以将默认值0传递给它,该值永远不会被使用,并且会抑制错误,但由于函数试图连接每个
start:stop
和
unstop:lead(stop)
每组所有行的值:
library(tidyverse)
df <- data.frame(group = c(1, 2, 2, 2),
start = c(2, 7, 7, 7),
stop = c(8, 7, 8, 9),
unstop = c(10, 7, 9, 10))
df |>
group_by(group) |>
mutate(
n_rows = n(),
split_weeks = case_when(
n_rows == 1 ~ str_c(start:stop, collapse = ","),
n_rows > 1 &
row_number() == 1 ~ str_c(c(start:stop, unstop:lead(stop, default = 0)), collapse = ","),
TRUE ~ "fail"
)
)
#> Warning in start:stop: numerical expression has 3 elements: only the first used
#> Warning in start:stop: numerical expression has 3 elements: only the first used
#> Warning in start:stop: numerical expression has 3 elements: only the first used
#> Warning in start:stop: numerical expression has 3 elements: only the first used
#> Warning in unstop:lead(stop, 0): numerical expression has 3 elements: only the
#> first used
#> Warning in unstop:lead(stop, 0): numerical expression has 3 elements: only the
#> first used
#> # A tibble: 4 Ã 6
#> # Groups: group [2]
#> group start stop unstop n_rows split_weeks
#> <dbl> <dbl> <dbl> <dbl> <int> <chr>
#> 1 1 2 8 10 1 2,3,4,5,6,7,8
#> 2 2 7 7 7 3 7,7
#> 3 2 7 8 9 3 fail
#> 4 2 7 9 10 3 fail
整理的一种方法是:
-
查找组外的lead值(默认设置为避免最后一行出现错误)
-
查找组中的行数和行号
-
再解组!
-
按行计算,使R只关注该行中的值
-
执行串联
结果就是这样,虽然不知道为什么
7,7,8
被放入该单元格(有意义,因为它连接了7到7和7到8):
df |>
mutate(lead_stop = lead(stop, default = 0)) |>
group_by(group) |>
mutate(
n_rows = n(),
rownum = row_number()) |>
ungroup() |>
rowwise() |>
mutate(
split_weeks = case_when(
rownum > 1 ~ "fail",
n_rows == 1 ~ str_c(start:stop, collapse = ","),
n_rows > 1 & rownum == 1 ~ str_c(c(start:stop, unstop:lead_stop), collapse = ","),
TRUE ~ "fail"
)
)
#> # A tibble: 4 Ã 8
#> # Rowwise:
#> group start stop unstop lead_stop n_rows rownum split_weeks
#> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int> <chr>
#> 1 1 2 8 10 7 1 1 2,3,4,5,6,7,8
#> 2 2 7 7 7 8 3 1 7,7,8
#> 3 2 7 8 9 9 3 2 fail
#> 4 2 7 9 10 0 3 3 fail
于2022年5月7日由
reprex package
(v2.0.1)