代码之家 › 专栏 › 技术社区 › hahakwok

在r中使用lappy函数和list

lapply apply r

hahakwok · 技术社区 · 7 年前

d1 <- data.frame(col_one = c(1,2,3),col_two = c(4, 5, 6))
d2 <- data.frame(col_one = c(1, 1, 1), col_two = c(6, 5, 4))
d3 <- data.frame(col_one = c(7, 1, 1), col_two = c(8, 5, 4))
my.list <- list(d1, d2,d3)

for (i in 1:3) {
  table<- lapply(my.list, function(data, count) {
    sql <-
      #sqldf(
        paste0(
          "select *,count(col_one) from data where col_one = ",
          count," group by col_one"
        )
      #)
    print(sql)
  },
  count = i)
}

输出:

[1] "select *,count(col_one) from data where col_one = 1 group by col_one"
[1] "select *,count(col_one) from data where col_one = 1 group by col_one"
[1] "select *,count(col_one) from data where col_one = 1 group by col_one"
[1] "select *,count(col_one) from data where col_one = 2 group by col_one"
[1] "select *,count(col_one) from data where col_one = 2 group by col_one"
[1] "select *,count(col_one) from data where col_one = 2 group by col_one"
[1] "select *,count(col_one) from data where col_one = 3 group by col_one"
[1] "select *,count(col_one) from data where col_one = 3 group by col_one"
[1] "select *,count(col_one) from data where col_one = 3 group by col_one"

期望值:

[1] "select *,count(col_one) from data where col_one = 1 group by col_one"
[1] "select *,count(col_one) from data where col_one = 2 group by col_one"
[1] "select *,count(col_one) from data where col_one = 3 group by col_one"

我该如何改进?我希望运行SQL来创建一个我想要的新数据集,但没有成功,我可以指定知道与SQL语句相关的列表索引。还有其他简单的方法吗?

我试过其中一种方法。

d1 <- data.frame(col_one = c(1,2,3),col_two = c(4, 5, 6))
d2 <- data.frame(col_one = c(3, 2, 1), col_two = c(6, 5, 4))
d3 <- data.frame(col_one = c(7, 2, 1), col_two = c(8, 5, 4))
my.list <- list(d1, d2,d3)
seq_along(x)
#for (i in 1:3) {
  table<- lapply(seq_along(my.list), function(index) {
    sql <-
      sqldf(
        paste0(
          "select *,count(col_one) from my.list where col_one = ",
          index," group by col_one"
        )
      )
    print(sql)
  })
#}

输出:

[1] "select *,count(col_one) from my.list where col_one = 1 group by col_one"
[1] "select *,count(col_one) from my.list where col_one = 2 group by col_one"
[1] "select *,count(col_one) from my.list where col_one = 3 group by col_one"

但是,它将找不到要运行SQL的数据集。

d1 <- data.frame(col_one = c(1,2,3),col_two = c(4, 5, 6))
d2 <- data.frame(col_one = c(1, 1, 1), col_two = c(6, 5, 4))
d3 <- data.frame(col_one = c(7, 1, 1), col_two = c(8, 5, 4))
my.list <- list(d1, d2,d3)
table<- mapply(function(data, count) {
  sql <-
    sqldf(
    paste0(
      "select *,count(col_one) from data where col_one = ",
      count," group by col_one"
    )
  )
  print(sql)
}, my.list, 1
)

2 回复 | 直到 7 年前

Uwe 7 年前

如果我理解正确,OP希望为 col_one 对于每个数据。中的帧 my.list ,即,他想知道值1、2或3在 第1列 在每个数据中。框架

如中所述 my answer 到 another question of the OP 根据建议 G. Grothendieck ,合并数据几乎总是更好。相框 相同的结构 在大数据中。而不是将它们单独列在一个列表中。顺便说一句,还有第三个 question ("how to loop the dataframe using sqldf?") 由OP请求帮助提供数据列表。框架。

合并数据。大数据中的帧。表 rbindlist() 使用函数。请注意,添加的id列 df 标识原始数据。每行的框架。

library(data.table)
rbindlist(my.list, idcol = "df")

   df col_one col_two
1:  1       1       4
2:  1       2       5
3:  1       3       6
4:  2       1       6
5:  2       1       5
6:  2       1       4
7:  3       7       8
8:  3       1       5
9:  3       1       4

现在,我们可以轻松计算聚合:

rbindlist(my.list, idcol = "df")[, count_col_one := .N, by = .(df, col_one)][]

   df col_one col_two count_col_one
1:  1       1       4             1
2:  1       2       5             1
3:  1       3       6             1
4:  2       1       6             3
5:  2       1       5             3
6:  2       1       4             3
7:  3       7       8             1
8:  3       1       5             2
9:  3       1       4             2

这 data.table 语句统计中每个单独值的出现次数 第1列 对于每个 df公司 使用特殊符号 .N 并按分组 df公司 和 第1列 .

在这个问题中,OP只要求计算 第1列 . 如果确实要这样做,则需要删除7的值。这可以通过过滤结果来实现:

rbindlist(my.list, idcol = "df")[, count_col_one := .N, by = .(df, col_one)][
  col_one %in% 1:3]

dmi3kno 7 年前

您需要迭代 data 和 counts 同时在里面 tidyverse 我建议使用purr::map2(),但在base R中,您可以简单地执行以下操作:'

table<- mapply(function(data, count) {
    sql <-
      #sqldf(
      paste0(
        "select *,count(col_one) from data where col_one = ",
        count," group by col_one"
      )
    #)
    print(sql)
  }, my.list, 1:3
  )
[1] "select *,count(col_one) from data where col_one = 1 group by col_one"
[1] "select *,count(col_one) from data where col_one = 2 group by col_one"
[1] "select *,count(col_one) from data where col_one = 3 group by col_one"