代码之家  ›  专栏  ›  技术社区  ›  Faith

创建包含四个列表中所有可能值组合的数据表

  •  1
  • Faith  · 技术社区  · 6 年前

    我有以下四个清单。

    varnames <- list("beefpork", "breakfast", "breakfast_yn", "diet_soda", "food_label", "fruit_and_veggie", "fruit_juice", "fruits", "milk",                      "min_foods","regular_soda", "ssb", "total_fruit", "vegetables",                      "asthma", "bmiclass3", "bmiclass4","bmiclass5", "dental_absence",                     "dental_appt", "diabetes", "food_allergies", "sore_teeth", "trying_weight",                     "count_pa60days", "count_vigpa20days", "gaming_bedroom", "other_organized_pa", "pa30outdoor","paguidelines", "pc_time", "school_transport", "sport_teams", "tv_bedroom", "tv_time_char", "video_games_char")
    grades <- list("2", "4", "8", "11")
    groups <- list("none", "ethnic", "bordercounty")
    regions <- list("state", "hsr")
    

    以及以下返回整数的函数:

    all_empty = function(outcome, groupvar, gradevar, regionvar){
      #How many observations?
    
      if (groupvar == "none") 
        fmla <- as.formula(paste0("~", outcome))
      else 
        fmla <- as.formula(paste0("~", outcome, "+", groupvar))
    
      if (regionvar == "hsr")
        mydata = span_phrwts
      else if (regionvar == "state" & groupvar %in% c("none", "ethnic"))
        mydata = span_statewts
      else if (regionvar == "state" & groupvar == "bordercounty")
        mydata = span_borderwts
      else mydata = span_statewts
    
      myrow = svytable(fmla, subset(mydata, grade==gradevar)) %>% nrow()
      return(myrow)
    }
    

    我希望最后一张桌子看起来像这样,但没有成功:

    Variable          Grade          Group           Region     Obs
    beefpork          2              none            state      5
    beefpork          4              none            state      5
    beefpork          8              none            state      3
    beefpork          11             none            state      0
    

    伪列 正确地。

    output_all <- matrix(ncol = 5, nrow = length(varnames)*length(grades)*length(groups)*length(regions))
    for(l in 1:length(regions)) {
      for (k in 1:length(grades)) {
        for(j in 1:length(groups)) {
          for(i in 1:length(varnames)){
            rownum = i + ((length(groups)*length(grades)*length(regions)) - 1)
            output_all[rownum, 1] = varnames[[i]]
            output_all[rownum, 2] = groups[[j]]
            output_all[rownum, 3] = grades[[k]]
            output_all[rownum, 4] = regions[[l]]
            output_all[rownum, 5] = all_empty(varnames[[i]], groups[[j]], grades [[k]], regions[[l]])
    
          }
        }
      } 
    }
    output_all %>% as_data_frame() %>% View()
    

    任何帮助/建议都将不胜感激!

    3 回复  |  直到 6 年前
        1
  •  4
  •   Jon Spring    6 年前

    如果可以使用向量而不是列表, tidyr::crossing 似乎是个直截了当的方法。

    varnames <- c("beefpork", "breakfast", "breakfast_yn", "diet_soda", "food_label", "fruit_and_veggie", "fruit_juice", "fruits", "milk",                      "min_foods","regular_soda", "ssb", "total_fruit", "vegetables",                      "asthma", "bmiclass3", "bmiclass4","bmiclass5", "dental_absence",                     "dental_appt", "diabetes", "food_allergies", "sore_teeth", "trying_weight",                     "count_pa60days", "count_vigpa20days", "gaming_bedroom", "other_organized_pa", "pa30outdoor","paguidelines", "pc_time", "school_transport", "sport_teams", "tv_bedroom", "tv_time_char", "video_games_char")
    grades <- c("2", "4", "8", "11")
    groups <- c("none", "ethnic", "bordercounty")
    regions <- c("state", "hsr")
    
    
    tidyr::crossing(varnames, grades, groups, regions)
    
    
    # A tibble: 864 x 4
       varnames grades groups       regions
       <chr>    <chr>  <chr>        <chr>  
     1 asthma   11     bordercounty hsr    
     2 asthma   11     bordercounty state  
     3 asthma   11     ethnic       hsr    
     4 asthma   11     ethnic       state  
     5 asthma   11     none         hsr    
     6 asthma   11     none         state  
     7 asthma   2      bordercounty hsr    
     8 asthma   2      bordercounty state  
     9 asthma   2      ethnic       hsr    
    10 asthma   2      ethnic       state  
    
        2
  •  3
  •   Billy34    6 年前

    data.table 你有这个功能 CJ 创建交叉连接。然后我们添加一个row num(Idx)来执行函数的按行调用。我们最终删除Idx列

    library(data.table)
    dt <- CJ(varnames=varnames,grades=grades,groups=groups,regions=regions)
    dt[,Idx:=.I]
    dt[,by=Idx, Obs:=all_empty(outcome, groupvar, gradevar, regionvar)]
    dt[,Idx:=NULL]
    
        3
  •  1
  •   Parfait    6 年前

    考虑 expand.grid mapply 将列值elementwise传递给用户定义的方法。

    varnames <- c("beefpork", "breakfast", "breakfast_yn", "diet_soda", 
                  "food_label", "fruit_and_veggie", "fruit_juice", 
                  "fruits", "milk", "min_foods", "regular_soda", 
                  "ssb", "total_fruit", "vegetables", "asthma", 
                  "bmiclass3", "bmiclass4","bmiclass5", "dental_absence",
                  "dental_appt", "diabetes", "food_allergies", 
                  "sore_teeth", "trying_weight", "count_pa60days", 
                  "count_vigpa20days", "gaming_bedroom", "other_organized_pa", 
                  "pa30outdoor","paguidelines", "pc_time", "school_transport", 
                  "sport_teams", "tv_bedroom", "tv_time_char", "video_games_char")
    grades <- c("2", "4", "8", "11")
    groups <- c("none", "ethnic", "bordercounty")
    regions <- c("state", "hsr")
    
    df <- expand.grid(varnames=varnames, grades=grades, groups=groups, regions=regions,
                      stringsAsFactors = FALSE)
    str(df)
    # 'data.frame': 864 obs. of  4 variables:
    # $ varnames: chr  "beefpork" "breakfast" "breakfast_yn" "diet_soda" ...
    # $ grades  : chr  "2" "2" "2" "2" ...
    # $ groups  : chr  "none" "none" "none" "none" ...
    # $ regions : chr  "state" "state" "state" "state" ...
    # ...
    
    df$fmla <- ifelse(df$groups == "none", paste0("~", outcome), paste0("~", outcome, "+", groupvar))
    
    df$mydata <- ifelse(df$regions == "hsr", "span_phrwts",
                        ifelse(df$regions == "state" & df$groups %in% c("none", "ethnic"), "span_statewts",
                               ifelse(df$regions == "state" & df$groups == "bordercounty", "span_borderwts", 
                                      "span_statewts")))
    

    all_empty <- function(outcome, groupvar, gradevar, regionvar, fmla, mydata){
      # How many observations?
      myrow <- svytable(as.formula(fmla), subset(get(mydata), grade==gradevar))
      return(nrow(myrow))
    }
    
    df$Obs <- mapply(all_empty, df$varnames, df$groups, df$grades, 
                     df$regions, df$fmla, df$mydata)