与missuse的方法相同,但由于您特别询问了效率,下面是
data.table
版本
list1 <- list(edgelist1, edgelist2, edgelist3, edgelist4) %>% lapply(as.data.frame, stringsAsFactors = F)
dt <- rbindlist(list1)
dt[dt[, pct := .N/length(list1), by = .(Node1, Node2)]$pct >= 0.75
, .(Weight = mean(as.numeric(Weight)))
, by = .(Node1, Node2)]
# Node1 Node2 Weight
# 1: ABCD1 DFEC 0.7444667
基准
f1 <- function(){
list1 <- list(edgelist1, edgelist2, edgelist3, edgelist4) %>% lapply(as.data.frame, stringsAsFactors = F)
dt <- rbindlist(list1)
dt[dt[, pct := .N/length(list1), by = .(Node1, Node2)]$pct >= 0.75
, .(Weight = mean(as.numeric(Weight)))
, by = .(Node1, Node2)]
}
f2 <- function(){
do.call(rbind, list1) %>% #bind all metrics together
as.data.frame %>% #convert to data frame
group_by(Node1, Node2) %>% #group by nodes
mutate(n1 = n()) %>% #count members of each group
filter(n1 >= (0.75 * length(list1))) %>% #filter those that are present in less than 75% of list elements
summarise(weight = mean(as.numeric(as.character(Weight)))) #get mean weight for those that are left
}
library(microbenchmark)
microbenchmark(f1(), f2())
# Unit: milliseconds
# expr min lq mean median uq max neval
# f1() 1.817024 2.207588 3.715193 2.718768 3.631382 33.88879 100
# f2() 7.789532 9.990557 16.287901 12.058657 15.876705 347.46884 100