代码之家  ›  专栏  ›  技术社区  ›  João Machado

通过变量id获取两列的所有可能组合

  •  5
  • João Machado  · 技术社区  · 6 年前

    我需要得到给定“组id”值的2个值的所有排列

    我有这个:

    group id    value
       1         a
       1         b
       1         c
       2         b
       2         c
       2         d
    

    想要这个:

    group id    value1   value2
       1         a        b
       1         a        c
       1         b        a
       1         b        c  
       1         c        a
       1         c        b
       2         b        c
       2         b        d
       2         c        b
       2         c        d
       2         d        b
       2         d        c
    
    4 回复  |  直到 5 年前
        1
  •  4
  •   gented    5 年前

    下面是快速简单的

    library(gtools)
    library(data.table)
    
    indices <- c(1,1,1,2,2,2)
    variables <- c("a", "b", "c", "b", "c", "d")
    dt <- data.table(indices, variables)
    
    get_permutations <- function(df){
        perm <- permutations(nrow(unique(df[,1])), 2, df$variables)
        as.data.table(perm)
    }
    
    ds <- dt[, get_permutations(.SD), by = indices]
    
        indices V1 V2
     1:       1  a  b
     2:       1  a  c
     3:       1  b  a
     4:       1  b  c
     5:       1  c  a
     6:       1  c  b
     7:       2  b  c
     8:       2  b  d
     9:       2  c  b
    10:       2  c  d
    11:       2  d  b
    12:       2  d  c
    
        2
  •  4
  •   MKR    6 年前

    一种可能的解决方案是使用 split 从…起 data.table expand.grid .

    步骤如下:

      library(data.table)
    
      setDT(df)
    
      #list will be generated for each group
      ll <- lapply(split(df, by="group_id"), 
         function(x)cbind(group_id = unique(x$group_id), 
         expand.grid(x$value, x$value, stringsAsFactors = F)))
    
      #Combine data frames from list and then filter those with 
      # having same value for both columns
      do.call("rbind", ll) %>% filter(Var1 != Var2)
    
    #Result
       group_id Var1 Var2
    1         1    b    a
    2         1    c    a
    3         1    a    b
    4         1    c    b
    5         1    a    c
    6         1    b    c
    7         2    c    b
    8         2    d    b
    9         2    b    c
    10        2    d    c
    11        2    b    d
    12        2    c    d
    

    数据

    df <- read.table(text = "group_id    value
    1         a
    1         b
    1         c
    2         b
    2         c
    2         d", header = TRUE, stringsAsFactors = FALSE)
    
        3
  •  2
  •   YOLO    6 年前

    您正在寻找 permutations 来自gtools。

    ## In general
    
    library(gtools)
    
    char.var <- c('a','b','c')
    df = as.data.frame(permutations(n=length(char.var), r=2, v=char.var))
    df
    
       V1 V2
    1  a  b
    2  a  c
    3  b  a
    4  b  c
    5  c  a
    6  c  b
    
    ## answer for question
    
    library(data.table)
    library(gtools)
    
    df <- data.frame(groupid = c(1,1,1,2,2,2), value = c('a','b','c','b','c','d'))
    df$value <- as.character(df$value)
    
    setDT(df)
    
    output <- data.table()
    
    for(i in unique(df$groupid))
    {
        temp_df = df[groupid == eval(i)] # this gets group
        temp_df2 <- as.data.table(permutations(length(temp_df$value), r=2, temp_df$value)) # this creates combinations
        temp_df2[, groupid := i]
        colnames(temp_df2)[1:2] <- c('value1','value2')
        output <- rbind(output, temp_df2) # this appends value in output df
    
    }
    
    print(output)
    
            value1 value2 groupid
     1:      a      b       1
     2:      a      c       1
     3:      b      a       1
     4:      b      c       1
     5:      c      a       1
     6:      c      b       1
     7:      b      c       2
     8:      b      d       2
     9:      c      b       2
    10:      c      d       2
    11:      d      b       2
    12:      d      c       2
    
        4
  •  1
  •   Mohammad Tanvir Ahamed    6 年前

    就像Joo Machado

    df <- data.frame(group_id = c(rep(1,3),rep(2,3)), value = c(letters[1:3],letters[2:4]))
    df <- split(x= df, f= df$group_id)
    df <- lapply(df, function(i)
    {
      library(gtools)
      a<- data.frame(gtools::permutations(n = length(as.vector(i[,"value"])), r= 2,v = as.vector(i[,"value"])))
      colnames(a) <- c("value1", "value2")
      a$group_id <- unique(as.vector(i[,"group_id"]))
      a <- a[,c("group_id","value1","value2")]
    })
    df <- do.call(rbind, df)