代码之家  ›  专栏  ›  技术社区  ›  Wilhelm Fantastisch

R中for循环的组内秩

  •  0
  • Wilhelm Fantastisch  · 技术社区  · 7 年前

    Specific group rankings in R ).

    Category    ID          Score.08.2007   Score.09.2007    Rank.08.2007    Rank.09.2007   ...
    Orange      FSGBR070N3  0.16            ...              5               ...
    Orange      FSGBR070N3  0.05            ...              7               ...
    Orange      FSGBR070N3  0.11                             6
    Orange      FS00008L4G  0.28                             1
    Orange      FS00008VLD  0.27                             2
    Orange      FS00008VLD  0.27                             3
    Orange      FS00008VLD  0.27                             4
    Orange      FS00009SQX  -2.03                            8
    Orange      FS00009SQX  NA                          
    Orange      FSUSA0A1KW  NA          
    Orange      FSUSA0A1KW  NA  
    Orange      FSUSA0A1KX  NA  
    Orange      FSUSA0A1KY  NA  
    Orange      FS0000B389  NA  
    Banana      FS000092GP  96.25                            1
    Banana      FS000092GP  96.25                            2
    Banana      FS000092GP  96.25                            3
    Banana      FS000092GP  52.33                            4
    Banana      FS0000ATLN  31.73                            5
    Banana      FSUSA0AVMF  1.38                             7
    Banana      FSGBR058O8  1.37                             8
    Banana      FSGBR05845  2.24                             6
    

    for (i in 4:ncol(MRAR)){
      eq_ranks[i] <- lapply(unique(MRAR$Morningstar.Category),function(x)
        {
         a <- rank(MRAR[MRAR$Morningstar.Category == x, i], na.last="keep")
         return(a)
      })
    }
    

    .错误:

    Error in `[<-.data.frame`(`*tmp*`, i, value = list(c(NA, 1047, NA, NA,  : 
    replacement element 1 has 3159 rows, need 3530 
    

    我也见过ave方法,但ave语法似乎不允许na。最后=“保留”要求。但我也开发了dplyr方法:

    aux <- as.vector(cbind(names(ER)))
    
    eq_ranks <- function(MRAR,group_by){
      group_by %>%
        group_by(!!Morningstar.Category) %>% 
    mutate_at(MRAR,quo(eq_rank=rank(MRAR)), vars(aux))
    }
    

    威廉·幻想曲。

    1 回复  |  直到 7 年前
        1
  •  0
  •   CPak    7 年前

    不确定这是否正是你想要的,但 data.frame 下面这个对我有用。希望有帮助

    df <- data.frame(Category=c(rep("Orange",10), rep("Banana",10)),
                 Score.08.2007=c(runif(6),rep(NA,4),runif(4),rep(NA,2),runif(4)),
                 Score.09.2017=c(runif(5),rep(NA,3),runif(2),runif(4),rep(NA,4),runif(2)),
                 stringsAsFactors=F)
    

    dplyr解决方案

    library(dplyr)
    
    eq_ranks <- function(theseCols, newCols, df){
                   theseCols <- enquo(theseCols)
                   df1 <- df %>%
                           group_by(Category) %>%
                           mutate_at(vars(!!theseCols), funs(rank(., na.last="keep"))) %>%
                           ungroup() %>%
                           select(-Category) %>%
                           setNames(newCols)
                   df2 <- cbind(df, df1)
                   return(df2)
                }
    
    aux <- colnames(df)[-1]
    newCols <- sub("Score", "Rank", aux)
    eq_ranks(aux,newCols,df)
    

    输出

    structure(list(Category = c("Orange", "Orange", "Orange", "Orange", 
    "Orange", "Orange", "Orange", "Orange", "Orange", "Orange", "Banana", 
    "Banana", "Banana", "Banana", "Banana", "Banana", "Banana", "Banana", 
    "Banana", "Banana"), Score.08.2007 = c(0.757087148027495, 0.202692255144939, 
    
    0.711121222469956, 0.121691921027377, 0.245488513959572, 0.14330437942408, 
    NA, NA, NA, NA, 0.239629415096715, 0.0589343772735447, 0.642288258532062, 
    0.876269212691113, NA, NA, 0.778914677444845, 0.79730882588774, 
    0.455274453619495, 0.410084082046524), Score.09.2017 = c(0.810870242770761, 
    0.604933290276676, 0.654723928077146, 0.353197271935642, 0.270260145887733, 
    NA, NA, NA, 0.99268406117335, 0.633493264438584, 0.213208135217428, 
    0.129372348077595, 0.478118034312502, 0.924074469832703, NA, 
    NA, NA, NA, 0.59876096714288, 0.976170694921166), Rank.08.2007 = c(6, 
    3, 5, 1, 4, 2, NA, NA, NA, NA, 2, 1, 5, 8, NA, NA, 6, 7, 4, 3
    ), Rank.09.2017 = c(6, 3, 5, 2, 1, NA, NA, NA, 7, 4, 2, 1, 3, 
    5, NA, NA, NA, NA, 4, 6)), .Names = c("Category", "Score.08.2007", 
    "Score.09.2017", "Rank.08.2007", "Rank.09.2017"), row.names = c(NA, 
    -20L), class = "data.frame")