代码之家  ›  专栏  ›  技术社区  ›  Laura

使用Dplyr将两个数据帧相乘,并用结果创建另一个数据帧

  •  1
  • Laura  · 技术社区  · 6 年前

    我有这个数据帧:

    d1 <- structure(list(Date = structure(1:3, .Label = c("setosa", "versicolor", 
    "virginica"), class = "factor"), NS_Forecast_beta1 = c(15.5594030844477, 
    15.7022727658641, 15.8449124021937), NS_Forecast_beta2 = c(-1.24810275875976, 
    -1.24810275875976, -1.24810275875976), NS_Forecast_beta5 = c(3.57197787769625, 
    3.57197787769625, 3.57197787769625)), row.names = c(NA, 3L), class = "data.frame")
    

    此数据帧将与另一个数据帧相乘:

    d2 <- structure(list(Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
    ), .Label = c("setosa", "versicolor", "virginica"), class = "factor"), 
        Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 5, 4.4, 
        4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 
        5.1, 4.6, 5.1, 4.8, 5, 5, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 
        4.9, 5, 5.5, 4.9, 4.4, 5.1, 5, 4.5, 4.4, 5, 5.1, 4.8, 5.1, 
        4.6, 5.3, 5, 7, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 
        5, 5.9, 6, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 
        6.1, 6.4, 6.6, 6.8, 6.7, 6, 5.7, 5.5, 5.5, 5.8, 6, 5.4, 6, 
        6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5, 5.6, 5.7, 5.7, 6.2, 
        5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 
        6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6, 6.9, 5.6, 
        7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 
        6.1, 7.7, 6.3, 6.4, 6, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 
        6.3, 6.5, 6.2, 5.9), Sepal.Width = c(3.5, 3, 3.2, 3.1, 3.6, 
        3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3, 3, 4, 4.4, 3.9, 3.5, 
        3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3, 3.4, 3.5, 3.4, 3.2, 
        3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3, 3.4, 3.5, 2.3, 
        3.2, 3.5, 3.8, 3, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 
        2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2, 3, 2.2, 2.9, 2.9, 3.1, 3, 
        2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3, 2.8, 3, 2.9, 2.6, 
        2.4, 2.4, 2.7, 2.7, 3, 3.4, 3.1, 2.3, 3, 2.5, 2.6, 3, 2.6, 
        2.3, 2.7, 3, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3, 2.9, 3, 3, 
        2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3, 2.5, 2.8, 3.2, 3, 3.8, 2.6, 
        2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3, 2.8, 3, 2.8, 3.8, 
        2.8, 2.8, 2.6, 3, 3.4, 3.1, 3, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 
        3, 2.5, 3, 3.4, 3), Petal.Length = c(1.4, 1.4, 1.3, 1.5, 
        1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 
        1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1, 1.7, 1.9, 1.6, 1.6, 1.5, 
        1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 
        1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 
        4.9, 4, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4, 4.7, 3.6, 
        4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4, 4.9, 4.7, 4.3, 4.4, 4.8, 
        5, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 
        4, 4.4, 4.6, 4, 3.3, 4.2, 4.2, 4.2, 4.3, 3, 4.1, 6, 5.1, 
        5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5, 
        5.1, 5.3, 5.5, 6.7, 6.9, 5, 5.7, 4.9, 6.7, 4.9, 5.7, 6, 4.8, 
        4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 
        5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5, 5.2, 5.4, 5.1)), row.names = c(NA, 
    150L), class = "data.frame")
    

    其思想是将第一个数据帧的每一行乘以第二个数据帧的行,再乘以我的第一个数据帧的组:setosa、versicolor和virginica,然后创建一个新的数据帧来存储结果。

    换句话说,这就是我想要的结果:

    Setosa Group:
        15.55940*5.1 +  (-1.248103)*3.5 + 3.571978*1.4 = 79.98535
        15.55940*4.9 +  (-1.248103)*3.0 + 3.571978*1.4 = 77.49752
    ....
    

    然后在与花色组 最后一组是弗吉尼亚

    这些结果应该和其他产品一起存储在一个新的dataframe中(150行,这是我的第二个dataframe列的长度,3列是3个组)

    如你所见,这是一个简单的矩阵积。

    如何使用dplyr包实现这一点?

    2 回复  |  直到 6 年前
        1
  •  5
  •   thc    6 年前

    有很多方法,这里有一种可能的方法:

    results <- full_join(d1, d2, by=c("Date" = "Species")) %>%
      mutate(Result = NS_Forecast_beta1*Sepal.Length + 
                      NS_Forecast_beta2*Sepal.Width + NS_Forecast_beta5*Petal.Length)
    
    head(results$Result)
    [1] 79.98537 77.49754 73.77884 73.06210 78.30461 85.22554
    
        2
  •  3
  •   MrFlick    6 年前

    dplyr的方法是使列名的函数显式化。我们只是用一个连接把正确的系数带到每一行

    例如

    d1 %>% left_join(d2, by=c("Date"="Species")) %>% mutate(
      value = NS_Forecast_beta1*Sepal.Length + NS_Forecast_beta2* Sepal.Width + NS_Forecast_beta5*Petal.Length
    )
    

    library(dplyr)
    library(tidyr)
    library(purrr)
    
    dd <- d1 %>% nest(-Date, .key="beta") %>%
      left_join(d2 %>% nest(-Species, .key="obs"), by=c("Date"="Species")) %>%
      mutate(value = map2(obs, beta, ~as.matrix(.x) %*% t(as.matrix(.y))))
    dd %>% unnest(value)