编辑
我重读了你的问题,意识到我错过了关于冲突的那部分;处理冲突的一个潜在解决方案是使用
powerjoin package
,例如。
library(tidyverse)
df1 <- data.frame(id=c("632592651","633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420"),
text=c("asdf","cat","dog","mouse","elephant","goose","rat","mice","kitty","kitten"),
response=c("y","y","y","n","n","y","y","n","n","y"))
df2 <- data.frame(id=c("632592651","633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420","636809222","2004722036","2004894388","2005045755","2005535472","2005630542","2005788781","2005809679","2005838317","2005866692"),
text=c("asdf_xyz","cat","dog","mouse","elephant","goose","rat","mice","kitty","kitten","tiger_xyz","lion","leopard","ostrich","kangaroo","platypus","fish","reptile","mammals","amphibians_xyz"),
volume=c(1234,432,324,333,2223,412346,7456,3456,2345,2345,6,345,23,2,4778,234,8675,3459,8,9))
expected_outcome <- data.frame(id = c("632592651","633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
text = c(NA, "cat", "dog", "mouse", "elephant", "goose",
"rat", "mice", "kitty", "kitten",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
volume = c(1234, 432, 324, 333, 2223, 412346, 7456,
3456, 2345, 2345, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA))
library(powerjoin)
joined_df <- power_full_join(df1, df2, by = c("id"),
conflict = rw ~ ifelse(.x != .y,
NA_integer_,
.x))
final_df <- joined_df %>%
mutate(across(everything(), ~ifelse(is.na(response), NA, .x))) %>%
select(id, text, volume)
final_df
#> id text volume
#> 1 632592651 <NA> 1234
#> 2 633322173 cat 432
#> 3 634703802 dog 324
#> 4 634927873 mouse 333
#> 5 635812953 elephant 2223
#> 6 636004739 goose 412346
#> 7 636101211 rat 7456
#> 8 636157799 mice 3456
#> 9 636263106 kitty 2345
#> 10 636752420 kitten 2345
#> 11 <NA> <NA> NA
#> 12 <NA> <NA> NA
#> 13 <NA> <NA> NA
#> 14 <NA> <NA> NA
#> 15 <NA> <NA> NA
#> 16 <NA> <NA> NA
#> 17 <NA> <NA> NA
#> 18 <NA> <NA> NA
#> 19 <NA> <NA> NA
#> 20 <NA> <NA> NA
all_equal(final_df, expected_outcome)
#> [1] TRUE
于2022-07-01由
reprex package
(v2.0.1)
原始答案
这种方法能解决您的问题吗?
library(tidyverse)
df1 <- data.frame(id=c("632592651","633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420"),
text=c("asdf","cat","dog","mouse","elephant","goose","rat","mice","kitty","kitten"),
response=c("y","y","y","n","n","y","y","n","n","y"))
df2 <- data.frame(id=c("632592651","633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420","636809222","2004722036","2004894388","2005045755","2005535472","2005630542","2005788781","2005809679","2005838317","2005866692"),
text=c("asdf_xyz","cat","dog","mouse","elephant","goose","rat","mice","kitty","kitten","tiger_xyz","lion","leopard","ostrich","kangaroo","platypus","fish","reptile","mammals","amphibians_xyz"),
volume=c("1234","432","324","333","2223","412346","7456","3456","2345","2345","6","345","23","2","4778","234","8675","3459","8","9"))
final <- read.table(text = "id text volume
1 632592651 NA 1234
2 633322173 cat 432
3 634703802 dog 324
4 634927873 mouse 333
5 635812953 elephant 2223
6 636004739 goose 412346
7 636101211 rat 7456
8 636157799 mice 3456
9 636263106 kitty 2345
10 636752420 kitten 2345
11 NA NA NA
12 NA NA NA
13 NA NA NA
14 NA NA NA
15 NA NA NA
16 NA NA NA
17 NA NA NA
18 NA NA NA
19 NA NA NA
20 NA NA NA", header = TRUE)
df1 %>%
full_join(df2) %>%
mutate(across(c(1,2,4), ~ifelse(is.na(response), NA, .x)))
#> Joining, by = c("id", "text")
#> id text response volume
#> 1 632592651 asdf y <NA>
#> 2 633322173 cat y 432
#> 3 634703802 dog y 324
#> 4 634927873 mouse n 333
#> 5 635812953 elephant n 2223
#> 6 636004739 goose y 412346
#> 7 636101211 rat y 7456
#> 8 636157799 mice n 3456
#> 9 636263106 kitty n 2345
#> 10 636752420 kitten y 2345
#> 11 <NA> <NA> <NA> <NA>
#> 12 <NA> <NA> <NA> <NA>
#> 13 <NA> <NA> <NA> <NA>
#> 14 <NA> <NA> <NA> <NA>
#> 15 <NA> <NA> <NA> <NA>
#> 16 <NA> <NA> <NA> <NA>
#> 17 <NA> <NA> <NA> <NA>
#> 18 <NA> <NA> <NA> <NA>
#> 19 <NA> <NA> <NA> <NA>
#> 20 <NA> <NA> <NA> <NA>
#> 21 <NA> <NA> <NA> <NA>
于2022-07-01由
reprex包
(v2.0.1)