如果您只想根据日期定义一年中的一周,那么您可以得到一个速度快20倍的解决方案:
library(data.table)
NN = 10000000
# NN = 1e4
set.seed(32040)
DT <- data.table(
col = seq_len(NN),
timestamp = 1521872652 + sample(7000001, NN, replace = TRUE)
)
DT1 <- copy(DT)
DT2 <- copy(DT)
tz <- "Africa/Addis_Ababa"
old <- function(DT) {
DT$localtime<- anytime::anytime(DT$timestamp, tz=tz) ###Lightning fast
DT$weekuni <- paste(lubridate::year(DT$localtime), lubridate::week(DT$localtime), sep="")
DT[, timestamp := NULL]
DT[, .(col, localtime, weekuni)]
}
new <- function(DT) {
DT[ , localtime := anytime::anytime(timestamp, tz = tz)]
DT[, Date := as.Date(localtime)]
DT[, weekuni := paste0(lubridate::year(.BY[[1L]]), lubridate::week(.BY[[1L]])),
keyby = "Date"]
DT[, Date := NULL]
# DT[, timestamp := NULL]
DT[order(col), .(col, localtime, weekuni)]
}
bench::mark(old(DT1), new(DT2), check = FALSE, filter_gc = FALSE)
#> # A tibble: 2 x 10
#> expression min mean median max `itr/sec` mem_alloc n_gc n_itr
#> <chr> <bch:t> <bch:t> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 old(DT1) 22.39s 22.39s 22.39s 22.39s 0.0447 2.28GB 5 1
#> 2 new(DT2) 1.13s 1.13s 1.13s 1.13s 0.888 878.12MB 1 1
#> # ... with 1 more variable: total_time <bch:tm>
于2018年6月23日由
reprex package
(第0.2.0版)。
即使你没有,你仍然可以通过使用
paste
每天一次:
library(data.table)
NN = 1e7
# NN = 1e4
set.seed(32040)
DT <- data.table(
col = seq_len(NN),
timestamp = 1521872652 + sample(7000001, NN, replace = TRUE)
)
DT1 <- copy(DT)
DT2 <- copy(DT)
DT3 <- copy(DT)
tz <- "Africa/Addis_Ababa"
old <- function(DT) {
DT$localtime<- anytime::anytime(DT$timestamp, tz=tz) ###Lightning fast
DT$weekuni <- paste(lubridate::year(DT$localtime), lubridate::week(DT$localtime), sep="")
DT[, timestamp := NULL]
DT[, .(col, weekuni)]
}
new <- function(DT) {
DT[ , Date := anytime::anydate(timestamp, tz = tz)]
DT[, weekuni := paste0(lubridate::year(.BY[[1L]]), lubridate::week(.BY[[1L]])),
keyby = "Date"]
DT[, Date := NULL]
# DT[, timestamp := NULL]
setorderv(DT[, .(col, weekuni)], "col")
}
bench::mark(old(DT1), new(DT2), check = TRUE, filter_gc = FALSE)
#> # A tibble: 2 x 10
#> expression min mean median max `itr/sec` mem_alloc n_gc n_itr
#> <chr> <bch:t> <bch:t> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 old(DT1) 22.2s 22.2s 22.2s 22.2s 0.0450 2.21GB 4 1
#> 2 new(DT2) 2.8s 2.8s 2.8s 2.8s 0.357 1.42GB 3 1
#> # ... with 1 more variable: total_time <bch:tm>