r语言 - 尝试在Rstudio中添加基于另一列中字符的第一个单词的case的列



小背景,我正在进行一个食物选择实验,包括不同类型的食物,如牛排,披萨,汉堡等。我想知道哪个试验使用了哪种食物,这样我就可以进行GLM来调查食物类型是否对食物选择有影响。

问题是我在数据中拼写了这些类型的食物不同,无论是大写的第一个字母还是没有(我知道的错误)。此外,我正试图从列"crossCheckExperiment"中提取2个可能的单词。里面有很多词

这是我的数据集


structure(list(session_id = c(53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047), project_id = c(495,
495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495,
495, 495, 495, 495, 495, 495), exp_name = c("Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice"), exp_id = c(569, 569, 569, 569,
569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569,
569, 569, 569), user_id = c(46946, 46946, 46946, 46946, 46946,
46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946,
46946, 46946, 46946, 46946, 46946, 46946), user_sex = c("male",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male"), user_status = c("guest", "guest", "guest",
"guest", "guest", "guest", "guest", "guest", "guest", "guest",
"guest", "guest", "guest", "guest", "guest", "guest", "guest",
"guest", "guest", "guest"), user_age = c(21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21),
trial_name = c("Steak_V_L_03_NV_NL_04", "Steak_V_L_01_NV_NL_02",
"Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02", "Curry_V_NL_06_NV_L_05",
"Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01", "Pizza_V_NL_04_NV_L_03",
"Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01", "Burger_V_NL_04_NV_L_03",
"Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05", "Steak_V_L_05_NV_NL_06",
"Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01", "Steak_V_NL_02_NV_L_01",
"Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05", "Pizza_NV_L_02_NV_NL_01"
), trial_n = c(29, 25, 50, 1, 46, 32, 38, 22, 69, 28, 14,
48, 67, 33, 37, 11, 26, 5, 72, 20), order = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
), dv = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,
0, 0, 1, 1), rt = c(2054, 2417, 2007, 3003, 2391, 3238, 3316,
2371, 4109, 3052, 3274, 3658, 2824, 1888, 4198, 5815, 2651,
6591, 5766, 3682), side = c(1, 2, 2, 1, 1, 1, 1, 2, 2, 1,
2, 1, 2, 1, 1, 2, 2, 1, 2, 2), dt = structure(c(1607441861,
1607441863, 1607441865, 1607441868, 1607441871, 1607441874,
1607441877, 1607441880, 1607441884, 1607441887, 1607441890,
1607441894, 1607441896, 1607441898, 1607441903, 1607441909,
1607441911, 1607441918, 1607441924, 1607441927), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), nreps = c(53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L), subjBelief_left = c("vegan",
"vegan", "vegan", "vegan", "vegan", "no vegan", "vegan",
"vegan", "vegan", "no vegan", "vegan", "no vegan", "vegan",
"vegan", "vegan", "vegan", "vegan", "vegan", "no vegan",
"no vegan"), subjBelief_right = c("no vegan", "no vegan",
"no vegan", "vegan", "no vegan", "no vegan", "no vegan",
"vegan", "no vegan", "no vegan", "no vegan", "no vegan",
"vegan", "no vegan", "vegan", "vegan", "no vegan", "no vegan",
"no vegan", "no vegan"), X1 = c("Vegan_steak_nolabel_3",
"Vegan_steak_nolabel_1", "Vegan_chicken_nolabel_2", "Vegan_sausage_nolabel_1",
"Vegan_curry_nolabel_6", "Nonvegan_steak_nolabel_4", "Vegan_curry_nolabel_2",
"Vegan_pizza_nolabel_4", "Vegan_pizza_nolabel_5", "Nonvegan_steak_nolabel_2",
"Vegan_burger_nolabel_4", "Nonvegan_curry_nolabel_6", "Vegan_sausage_nolabel_6",
"Vegan_steak_nolabel_5", "Vegan_curry_nolabel_1", "Vegan_burger_nolabel_2",
"Vegan_steak_nolabel_2", "Vegan_sausage_nolabel_3", "Nonvegan_pizza_nolabel_6",
"Nonvegan_pizza_nolabel_2"), X2 = c("Nonvegan_steak_nolabel_4",
"Nonvegan_steak_nolabel_2", "Nonvegan_chicken_nolabel_1",
"Nonvegan_sausage_nolabel_2", "Nonvegan_curry_nolabel_5",
"Nonvegan_steak_nolabel_3", "Nonvegan_curry_nolabel_1", "Nonvegan_pizza_nolabel_3",
"Nonvegan_pizza_nolabel_6", "Nonvegan_steak_nolabel_1", "Nonvegan_burger_nolabel_3",
"Nonvegan_curry_nolabel_5", "Vegan_sausage_nolabel_5", "Nonvegan_steak_nolabel_6",
"Nonvegan_curry_nolabel_2", "Vegan_burger_nolabel_1", "Nonvegan_steak_nolabel_1",
"Nonvegan_sausage_nolabel_4", "Nonvegan_pizza_nolabel_5",
"Nonvegan_pizza_nolabel_1"), crossCheckExperiment = c("Steak_V_L_03_NV_NL_04",
"Steak_V_L_01_NV_NL_02", "Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02",
"Curry_V_NL_06_NV_L_05", "Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01",
"Pizza_V_NL_04_NV_L_03", "Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01",
"Burger_V_NL_04_NV_L_03", "Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05",
"Steak_V_L_05_NV_NL_06", "Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01",
"Steak_V_NL_02_NV_L_01", "Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05",
"Pizza_NV_L_02_NV_NL_01"), checkSubjId = c(53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047
), subjectVeganQuantification = c(5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), left_food = c("V", "V",
"V", "V", "V", "NV", "V", "V", "V", "NV", "V", "NV", "V",
"V", "V", "V", "V", "V", "NV", "NV"), left_label = c("L",
"L", "NL", "L", "NL", "L", "NL", "NL", "L", "L", "NL", "L",
"L", "L", "L", "L", "NL", "L", "L", "L"), right_food = c("NV",
"NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV",
"NV", "V", "NV", "NV", "V", "NV", "NV", "NV", "NV"), right_label = c("NL",
"NL", "L", "NL", "L", "NL", "L", "L", "NL", "NL", "L", "NL",
"NL", "NL", "NL", "NL", "L", "NL", "NL", "NL"), dv_inv = c(1,
1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0),
appear_selected = c("no vegan", "no vegan", "no vegan", "vegan",
"no vegan", "no vegan", "vegan", "vegan", "vegan", "no vegan",
"no vegan", "no vegan", "vegan", "no vegan", "vegan", "vegan",
"no vegan", "no vegan", "no vegan", "no vegan"), label_selected = c("NL",
"NL", "L", "L", "L", "NL", "NL", "NL", "L", "NL", "L", "NL",
"L", "NL", "NL", "NL", "L", "NL", "L", "L"), counter = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
dv_recoded = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,
1, 1, 0, 0, 0, 0), cong = c(1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0), veg_cong = c(0, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0), incong = c(0,
0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
noveg_cong = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 1, 1), control_cong = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), congVar = c("cong",
"cong", "incong", "veg_cong", "incong", "noveg_cong", "incong",
"veg_cong", "cong", "noveg_cong", "incong", "noveg_cong",
"veg_cong", "cong", "veg_cong", "veg_cong", "incong", "cong",
"noveg_cong", "noveg_cong"), cong2 = c(TRUE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE), veg_cong2 = c(FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
FALSE), incong2 = c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE), noveg_cong2 = c(FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
TRUE), fac = structure(c(1L, 1L, 3L, 2L, 3L, 4L, 3L, 2L,
1L, 4L, 3L, 4L, 2L, 1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("cong",
"veg_cong", "incong", "noveg_cong"), class = "factor"), trialType = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)), row.names = c(NA, 20L), class = "data.frame")

这是我目前拥有的代码

my_data_filt <- mutate(Group =
case_when(if (my_data_filt) crossCheckExperiment == 'Steak', 'steak' ~ "steak", 
if (my_data_filt) crossCheckExperiment == 'Burger', 'burger' ~ "burger",
if (my_data_filt) crossCheckExperiment == 'Chicken', 'chicken' ~ "chicken",
if (my_data_filt) crossCheckExperiment == 'Pizza', 'pizza' ~ "pizza",
if (my_data_filt) crossCheckExperiment == 'Sausage', 'sausage' ~ "sausage",
if (my_data_filt) crossCheckExperiment == 'Curry', 'curry' ~ "sausage")
)

然而,我一直得到这个错误

Error in if (my_data_filt) crossCheckExperiment == "Steak" : 
argument is not interpretable as logical

您的代码中有几个问题。首先,你没有在mutate()的第一个参数中包含数据帧。第二是case_when()没有被正确使用。第三是==只在你寻找等于一个值的东西时起作用。如果您希望它等于几个值中的一个,请使用%in%操作符。

mutate(my_data_filt, 
Group = case_when(crossCheckExperiment %in% c('Steak', 'steak') ~ "steak", 
crossCheckExperiment %in% c('Burger', 'burger') ~ "burger",
etc....))

如果变量中包含感兴趣的单词以及其他内容,则stringr中的str_detect()函数很有用:

mutate(my_data_filt, 
Group = case_when(str_detect(crossCheckExperiment, '(S|s)teak') ~ "steak", 
str_detect(crossCheckExperiment, '(B|b)urger') ~ "burger", 
etc....))

相关内容

  • 没有找到相关文章

最新更新