真正的挑战是创建
data.frame
完成缺少的行后。一种解决方案是使用
zoo
图书馆这个
rollapply
函数将提供分配
NA
初始行的值。
按原样使用OP的数据,解决方案可以是:
library(zoo)
library(dplyr)
# Data from OP
df <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
day = c(15L,16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 16L, 17L, 18L, 20L,
16L, 18L, 19L, 21L, 22L, 24L, 25L, 27L),
outcome = c(100L, 110L,190L, 130L, 140L, 150L, 160L, 100L, 180L, 120L, 90L, 110L, 120L,
130L, 110L, 200L, 180L, 170L, 180L, 210L, 160L, 200L)),
.Names = c("id", "day", "outcome"), row.names = c(NA, -22L), class = "data.frame")
# Make a list without missing day for each id
df_complete <- merge(
expand.grid(id=unique(df$id), day=min(df$day):max(df$day)),
df, all=TRUE)
# Valid range of day for each ID group
df_id_wise_range <- df %>% group_by(id) %>%
summarise(min_day = min(day), max_day = max(day)) %>% as.data.frame()
# id min_day max_day
# 1 1 15 24
# 2 2 16 20
# 3 3 16 27
# Join original df and df_complete and then use df_id_wise_range to
# filter it for valid range of day for each group
df_final <- df_complete %>%
left_join(df, by=c("id","day")) %>%
select(-outcome.y) %>%
inner_join(df_id_wise_range, by="id") %>%
filter(day >= min_day & day <= max_day) %>%
mutate(outcome = outcome.x) %>%
select( id, day, outcome) %>%
as.data.frame()
# Now apply mean to get average
df_average <- df_final %>% group_by(id) %>%
mutate(average= rollapply(outcome, 7, mean, na.rm = TRUE, by = 1,
fill = NA, align = "right", partial = 7)) %>% as.data.frame()
df_average
# The result
# id day outcome average
#1 1 15 100 NA
#2 1 16 110 NA
#3 1 17 190 NA
#4 1 18 130 NA
#5 1 19 140 NA
#6 1 20 150 NA
#7 1 21 160 140.0
#8 1 22 100 140.0
#9 1 23 180 150.0
#10 1 24 120 140.0
#11 2 16 90 NA
#12 2 17 110 NA
#13 2 18 120 NA
#....
#....
#19 3 19 180 NA
#20 3 20 NA NA
#21 3 21 170 NA
#22 3 22 180 168.0
#23 3 23 NA 182.5
#24 3 24 210 188.0
#25 3 25 160 180.0
#26 3 26 NA 180.0
#27 3 27 200 184.0
计算步骤
moving slope
是:
首先创建一个函数以返回坡度
将函数用作的一部分
rollapplyr
#Function to calculate slope
slop_e <- function(z) coef(lm(b ~ a, as.data.frame(z)))[[2]]
#Apply function
z2$slope <- rollapplyr(zoo(z2), 7, slop_e , by.column = FALSE, fill = NA, align = "right")
z2
a b mean_a slope
1 1 21 NA NA
2 2 22 NA NA
3 3 23 NA NA
4 4 24 NA NA
5 5 25 NA NA
6 6 26 NA NA
7 7 27 4 1
8 8 28 5 1
9 9 29 6 1
10 10 30 7 1
11 11 31 8 1
12 12 32 9 1
13 13 33 10 1
14 14 34 11 1
15 15 35 12 1
16 16 36 13 1
17 17 37 14 1
18 18 38 15 1
19 19 39 16 1
20 20 40 17 1