library(dplyr)
library(lubridate)
df = structure(list(Timestamp = c("7/16/2017 18:04", "7/16/2017 18:18",
"7/16/2017 18:32", "7/16/2017 18:46", "7/16/2017 19:00", "7/16/2017 19:14",
"7/16/2017 19:28", "7/16/2017 19:42", "7/16/2017 19:56", "7/16/2017 20:10",
"7/16/2017 20:42", "7/16/2017 20:56", "7/16/2017 21:10", "7/16/2017 21:24",
"7/16/2017 21:38", "7/16/2017 21:52", "7/16/2017 22:06", "7/16/2017 22:20",
"7/16/2017 22:34", "7/16/2017 22:48"), Category = c("x", "y",
"x", "x", "y", "y", "x", "x", "z", "z", "x", "x", "z", "x", "z",
"z", "y", "x", "z", "z"), data = c(4.9, 4.7, 8.2, 2.2, 2.7, 3.8,
8, 7.3, 10.1, 5.4, 17.5, 6.3, 5.8, 0.6, 2.2, 2.9, 0.5, 5.1, 8,
3.6)), .Names = c("Timestamp", "Category", "data"), class = "data.frame", row.names = c(NA, -20L))
df %>%
mutate(Timestamp = mdy_hm(Timestamp), # update to a datetime variable (if needed)
TimeDiff = difftime(Timestamp, min(Timestamp), units = "hours"), # get the distance from the first timestamp of the dataset (in hours)
TimeGroup = as.numeric(TimeDiff) %/% 2) %>% # create a grouping variable based on the distance
group_by(TimeGroup, Category) %>% # for each group and category
summarise(Category_MinTime = min(Timestamp), # get the first time stamp for this category in this group
data_avg = mean(data), # get average
data_sd = sd(data), # get sd
NumObs = n()) %>% # get number of observations (might be useful)
mutate(TimeGroup_MinTime = min(Category_MinTime)) %>% # get first time stamp of that time group
ungroup() %>% # forget the grouping
select(TimeGroup, TimeGroup_MinTime, everything()) # re arrange columns
# # A tibble: 8 x 7
# TimeGroup TimeGroup_MinTime Category Category_MinTime data_avg data_sd NumObs
# <dbl> <dttm> <chr> <dttm> <dbl> <dbl> <int>
# 1 0 2017-07-16 18:04:00 x 2017-07-16 18:04:00 6.120000 2.554799 5
# 2 0 2017-07-16 18:04:00 y 2017-07-16 18:18:00 3.733333 1.001665 3
# 3 0 2017-07-16 18:04:00 z 2017-07-16 19:56:00 10.100000 NaN 1
# 4 1 2017-07-16 20:10:00 x 2017-07-16 20:42:00 8.133333 8.597868 3
# 5 1 2017-07-16 20:10:00 z 2017-07-16 20:10:00 4.075000 1.791415 4
# 6 2 2017-07-16 22:06:00 x 2017-07-16 22:20:00 5.100000 NaN 1
# 7 2 2017-07-16 22:06:00 y 2017-07-16 22:06:00 0.500000 NaN 1
# 8 2 2017-07-16 22:06:00 z 2017-07-16 22:34:00 5.800000 3.111270 2