代码之家  ›  专栏  ›  技术社区  ›  SidC

如何在多个模型中使用broom::tidy?

  •  -2
  • SidC  · 技术社区  · 6 年前

    我试图用broom总结19个多项式回归模型的结果。我跟着这个走了 SO Question 我想用它 broom::tidy . 我的剧本如下:

    ALTER PROCEDURE [dbo].[spRegressionPeak]
    @StudyID int
    AS
    BEGIN
    Declare @sStudyID VARCHAR(50)
    Set @sStudyID = CONVERT(VARCHAR(50),@StudyID)
    
    --We are selecting the distinct StudyID, Productnumber, ResponseID and mean 
    values 1 thorugh 6 from the CodeMeans table.  
    --Note that spCodeMeans must be run before running this stored procedure to 
    ensure response data exists in the CodeMeans table.
    --We use IsNull values to pass zeroes where an average wasn't calculated os that 
    the polynomial regression can be calculated.
    DECLARE @inquery  AS NVARCHAR(MAX) = '
        Select
    c.StudyID, c.RespID, c.LikingOrder, avg(isnull(C1,0)) as C1, avg(isnull(C2,0)) as C2, avg(isnull(C3,0)) as C3, avg(isnull(C4,0)) as C4, 
    avg(isnull(C5,0)) as C5, avg(isnull(C6,0)) as C6, avg(isnull(C7,0)) as C7, avg(isnull(C8,0)) as C8, avg(isnull(C9,0)) as C9, 
    avg(isnull(C10,0)) as C10, avg(isnull(C11,0)) as C11, avg(isnull(C12,0)) as C12, avg(isnull(C13,0)) as C13, avg(isnull(C14,0)) as C14, 
    avg(isnull(C15,0)) as C15, avg(isnull(C16,0)) as C16, avg(isnull(C17,0)) as C17, avg(isnull(C18,0)) as C18, avg(isnull(C19,0)) as C19
    from ClosedStudyResponses c
    where c.StudyID = @StudyID
    group by StudyID, RespID, LikingOrder
    order by RespID
            '
    
    
    --We are setting @inquery aka InputDataSet to be our initial dataset.  
    --R Services requires that a data.frame be passed to any calculations being 
    generated.  As such, df is simply data framing the @inquery data.
    --The res object holds the polynomial regression results by RespondentID and 
    LikingOrder for each of the averages in the @inquery resultset.
    
    EXEC sp_execute_external_script @language = N'R'
        , @script = N'
        library(tidyr, broom)
    
        studymeans <- InputDataSet
    
        df <- data.frame(studymeans) 
    
        lin.mod.1 <- lm(df$LikingOrder ~ poly(df$C1,3, raw=TRUE))
        lin.mod.2 <- lm(df$LikingOrder ~ poly(df$C2,3, raw=TRUE))
        lin.mod.3 <- lm(df$LikingOrder ~ poly(df$C3,3, raw=TRUE))
        lin.mod.4 <- lm(df$LikingOrder ~ poly(df$C4,3, raw=TRUE))
        lin.mod.5 <- lm(df$LikingOrder ~ poly(df$C5,3, raw=TRUE))
        lin.mod.6 <- lm(df$LikingOrder ~ poly(df$C6,3, raw=TRUE))
        lin.mod.7 <- lm(df$LikingOrder ~ poly(df$C7,3, raw=TRUE))
        lin.mod.8 <- lm(df$LikingOrder ~ poly(df$C8,3, raw=TRUE))
        lin.mod.9 <- lm(df$LikingOrder ~ poly(df$C9,3, raw=TRUE))
        lin.mod.10 <- lm(df$LikingOrder ~ poly(df$C10,3, raw=TRUE))
        lin.mod.11 <- lm(df$LikingOrder ~ poly(df$C11,3, raw=TRUE))
        lin.mod.12 <- lm(df$LikingOrder ~ poly(df$C12,3, raw=TRUE))
        lin.mod.13 <- lm(df$LikingOrder ~ poly(df$C13,3, raw=TRUE))
        lin.mod.14 <- lm(df$LikingOrder ~ poly(df$C14,3, raw=TRUE))
        lin.mod.15 <- lm(df$LikingOrder ~ poly(df$C15,3, raw=TRUE))
        lin.mod.16 <- lm(df$LikingOrder ~ poly(df$C16,3, raw=TRUE))
        lin.mod.17 <- lm(df$LikingOrder ~ poly(df$C17,3, raw=TRUE))
        lin.mod.18 <- lm(df$LikingOrder ~ poly(df$C18,3, raw=TRUE))
        lin.mod.19 <- lm(df$LikingOrder ~ poly(df$C19,3, raw=TRUE))
    
        lst <- lapply(ls(pattern="lin.mod"), get)
        allmodels <- lapply(lst, summary)
    
        res <- broom::tidy(allmodels)
    '
    , @input_data_1 = @inquery
    , @output_data_1_name = N'res'
    , @params = N'@StudyID int'
    ,@StudyID = @StudyID 
    --- Edit this line to handle the output data frame.
    --WITH RESULT SETS ((StudyID int, RespID int, LikingOrder int, NewColumn int, 
    res varchar(max)));
    END;
    

    当向上面的脚本传递有效studyID输入参数时,上面的脚本将引发以下错误:

    Error in setNames(data.frame(data), value.name) : 
    'names' attribute [1] must be the same length as the vector [0]
    Calls: source ... <Anonymous> -> <Anonymous> -> melt.default -> setNames
    In addition: There were 50 or more warnings (use warnings() to see the first 
    50)
    

    我的输入数据如下: enter image description here 期望的结果是在一个data.frame中获得所有19个模型的摘要。如何解决错误并修改代码以完成最终结果?

    2 回复  |  直到 6 年前
        1
  •  4
  •   Calum You    6 年前

    如果没有您的工作环境,我不确定您的数据是如何设置的,但似乎您正在尝试将一个具有相同因变量的模型应用于多个预测器列。我想丢失的那件是打电话给 rowwise 根据 broom and dplyr 小插曲,但不完全确定。不过,这里有一个使用 mtcars 数据集。请注意,该结构将使用 tidy 在具有包含模型的列表列的rowwise数据帧上,而不是直接在列表上。您还可以通过在包含预测值列的数据帧上进行映射来直接创建模型,而不是在环境中混乱地存储模型并需要使用 get ls . 任何时候你发现自己在使用 LS ,考虑是否可以将元素放在列表中!

    编辑:在再次查看了这个问题提示的小插曲之后,我意识到您实际上可以像现在所示快速进行管道操作(有关使用 enframe . 通过 gather 将数据转换成适合分组模型拟合的格式,可以更整齐地获得所需的结果!

    library(tidyverse)
    library(broom)
    
    mtcars %>%
      gather(predictor, measure, -mpg) %>%
      group_by(predictor) %>%
      do(tidy(lm(mpg ~ measure, .)))
    #> # A tibble: 20 x 6
    #> # Groups:   predictor [10]
    #>    predictor term        estimate std.error statistic  p.value
    #>    <chr>     <chr>          <dbl>     <dbl>     <dbl>    <dbl>
    #>  1 am        (Intercept)  17.1      1.12       15.2   1.13e-15
    #>  2 am        measure       7.24     1.76        4.11  2.85e- 4
    #>  3 carb      (Intercept)  25.9      1.84       14.1   9.22e-15
    #>  4 carb      measure      -2.06     0.569      -3.62  1.08e- 3
    #>  5 cyl       (Intercept)  37.9      2.07       18.3   8.37e-18
    #>  6 cyl       measure      -2.88     0.322      -8.92  6.11e-10
    #>  7 disp      (Intercept)  29.6      1.23       24.1   3.58e-21
    #>  8 disp      measure      -0.0412   0.00471    -8.75  9.38e-10
    #>  9 drat      (Intercept)  -7.52     5.48       -1.37  1.80e- 1
    #> 10 drat      measure       7.68     1.51        5.10  1.78e- 5
    #> 11 gear      (Intercept)   5.62     4.92        1.14  2.62e- 1
    #> 12 gear      measure       3.92     1.31        3.00  5.40e- 3
    #> 13 hp        (Intercept)  30.1      1.63       18.4   6.64e-18
    #> 14 hp        measure      -0.0682   0.0101     -6.74  1.79e- 7
    #> 15 qsec      (Intercept)  -5.11    10.0        -0.510 6.14e- 1
    #> 16 qsec      measure       1.41     0.559       2.53  1.71e- 2
    #> 17 vs        (Intercept)  16.6      1.08       15.4   8.85e-16
    #> 18 vs        measure       7.94     1.63        4.86  3.42e- 5
    #> 19 wt        (Intercept)  37.3      1.88       19.9   8.24e-19
    #> 20 wt        measure      -5.34     0.559      -9.56  1.29e-10
    

    创建于2018-07-10 reprex package (v0.2.0版)。

        2
  •  1
  •   Ben Bolker    6 年前

    你还没有给我们一个可重复的例子;这里有一些似乎有效的例子。一些潜在的问题:你需要跑步 tidy 在模型上,而不是总结;最好避免 $ -模型公式中的索引。

    library(purrr)
    df <- mtcars
    predvars <- colnames(mtcars)[-1]
    

    …这将是 paste0("C",1:19) 就你而言…

    respvar <- "mpg"  ## would be "LikingOrder"
    predpolys <- sprintf("poly(%s,3,raw=TRUE)",predvars)
    forms <- map(predpolys, reformulate,
                 response=respvar)       ## construct formulas
    names(forms) <- predvars             ## names will be inherited by model lists
    modList <- map(forms, lm, data= df)  ## fit all models
    res <- map(modList, broom::tidy)     ## tidy all models
    

    如果需要,你可以 dplyr::bind_rows(res,.id="predvar") 此时…