下面是使用
data.table
zoo::na.approx
ref*
列(请注意,还使用了较大的数据集):
library(data.table)
library(zoo)
dt_function_test_0 = function() {
set.seed(123)
# data.table
dt_random = data.table(vals = runif(1e7, 0, 500))
dt_na = data.table(vals = c(0, 250, 500),
ref1 = c(0.33, 0.45, 0.78),
ref2 = c(0.12, 0.79, 1))
cols <- c("ref1", "ref2")
##Version 0
merge(dt_random, dt_na, all=TRUE)[, lapply(.SD, na.approx)]
}
dt_function_test_1 = function() {
set.seed(123)
# data.table
dt_random = data.table(vals = runif(1e7, 0, 500))
dt_na = data.table(vals = c(0, 250, 500),
ref1 = c(0.33, 0.45, 0.78),
ref2 = c(0.12, 0.79, 1))
cols <- c("ref1", "ref2")
##Version 1: using update by reference
merge(dt_random, dt_na, all = TRUE)[,
(cols) := lapply(.SD, na.approx), .SDcols=cols]
}
dt_function_test_2 = function() {
set.seed(123)
# data.table
dt_random = data.table(vals = runif(1e7, 0, 500))
dt_na = data.table(vals = c(0, 250, 500),
ref1 = c(0.33, 0.45, 0.78),
ref2 = c(0.12, 0.79, 1))
cols <- c("ref1", "ref2")
##Version 2: using set
dt_merged <- merge(dt_random, dt_na, all = TRUE)
for (x in cols)
set(dt_merged, j=x, value=na.approx(dt_merged[[x]]))
dt_merged
}
定时输出:
> system.time(dt_function_test_0())
user system elapsed
5.44 1.90 6.96
> system.time(dt_function_test_1())
user system elapsed
3.55 1.30 4.41
> system.time(dt_function_test_2())
user system elapsed
3.78 1.19 4.52