小背景,我正在进行一个食物选择实验,包括不同类型的食物,如牛排,披萨,汉堡等。我想知道哪个试验使用了哪种食物,这样我就可以进行GLM来调查食物类型是否对食物选择有影响。
问题是我在数据中拼写了这些类型的食物不同,无论是大写的第一个字母还是没有(我知道的错误)。此外,我正试图从列"crossCheckExperiment"中提取2个可能的单词。里面有很多词
这是我的数据集
structure(list(session_id = c(53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047), project_id = c(495,
495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495,
495, 495, 495, 495, 495, 495), exp_name = c("Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice"), exp_id = c(569, 569, 569, 569,
569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569,
569, 569, 569), user_id = c(46946, 46946, 46946, 46946, 46946,
46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946,
46946, 46946, 46946, 46946, 46946, 46946), user_sex = c("male",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male"), user_status = c("guest", "guest", "guest",
"guest", "guest", "guest", "guest", "guest", "guest", "guest",
"guest", "guest", "guest", "guest", "guest", "guest", "guest",
"guest", "guest", "guest"), user_age = c(21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21),
trial_name = c("Steak_V_L_03_NV_NL_04", "Steak_V_L_01_NV_NL_02",
"Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02", "Curry_V_NL_06_NV_L_05",
"Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01", "Pizza_V_NL_04_NV_L_03",
"Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01", "Burger_V_NL_04_NV_L_03",
"Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05", "Steak_V_L_05_NV_NL_06",
"Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01", "Steak_V_NL_02_NV_L_01",
"Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05", "Pizza_NV_L_02_NV_NL_01"
), trial_n = c(29, 25, 50, 1, 46, 32, 38, 22, 69, 28, 14,
48, 67, 33, 37, 11, 26, 5, 72, 20), order = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
), dv = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,
0, 0, 1, 1), rt = c(2054, 2417, 2007, 3003, 2391, 3238, 3316,
2371, 4109, 3052, 3274, 3658, 2824, 1888, 4198, 5815, 2651,
6591, 5766, 3682), side = c(1, 2, 2, 1, 1, 1, 1, 2, 2, 1,
2, 1, 2, 1, 1, 2, 2, 1, 2, 2), dt = structure(c(1607441861,
1607441863, 1607441865, 1607441868, 1607441871, 1607441874,
1607441877, 1607441880, 1607441884, 1607441887, 1607441890,
1607441894, 1607441896, 1607441898, 1607441903, 1607441909,
1607441911, 1607441918, 1607441924, 1607441927), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), nreps = c(53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L), subjBelief_left = c("vegan",
"vegan", "vegan", "vegan", "vegan", "no vegan", "vegan",
"vegan", "vegan", "no vegan", "vegan", "no vegan", "vegan",
"vegan", "vegan", "vegan", "vegan", "vegan", "no vegan",
"no vegan"), subjBelief_right = c("no vegan", "no vegan",
"no vegan", "vegan", "no vegan", "no vegan", "no vegan",
"vegan", "no vegan", "no vegan", "no vegan", "no vegan",
"vegan", "no vegan", "vegan", "vegan", "no vegan", "no vegan",
"no vegan", "no vegan"), X1 = c("Vegan_steak_nolabel_3",
"Vegan_steak_nolabel_1", "Vegan_chicken_nolabel_2", "Vegan_sausage_nolabel_1",
"Vegan_curry_nolabel_6", "Nonvegan_steak_nolabel_4", "Vegan_curry_nolabel_2",
"Vegan_pizza_nolabel_4", "Vegan_pizza_nolabel_5", "Nonvegan_steak_nolabel_2",
"Vegan_burger_nolabel_4", "Nonvegan_curry_nolabel_6", "Vegan_sausage_nolabel_6",
"Vegan_steak_nolabel_5", "Vegan_curry_nolabel_1", "Vegan_burger_nolabel_2",
"Vegan_steak_nolabel_2", "Vegan_sausage_nolabel_3", "Nonvegan_pizza_nolabel_6",
"Nonvegan_pizza_nolabel_2"), X2 = c("Nonvegan_steak_nolabel_4",
"Nonvegan_steak_nolabel_2", "Nonvegan_chicken_nolabel_1",
"Nonvegan_sausage_nolabel_2", "Nonvegan_curry_nolabel_5",
"Nonvegan_steak_nolabel_3", "Nonvegan_curry_nolabel_1", "Nonvegan_pizza_nolabel_3",
"Nonvegan_pizza_nolabel_6", "Nonvegan_steak_nolabel_1", "Nonvegan_burger_nolabel_3",
"Nonvegan_curry_nolabel_5", "Vegan_sausage_nolabel_5", "Nonvegan_steak_nolabel_6",
"Nonvegan_curry_nolabel_2", "Vegan_burger_nolabel_1", "Nonvegan_steak_nolabel_1",
"Nonvegan_sausage_nolabel_4", "Nonvegan_pizza_nolabel_5",
"Nonvegan_pizza_nolabel_1"), crossCheckExperiment = c("Steak_V_L_03_NV_NL_04",
"Steak_V_L_01_NV_NL_02", "Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02",
"Curry_V_NL_06_NV_L_05", "Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01",
"Pizza_V_NL_04_NV_L_03", "Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01",
"Burger_V_NL_04_NV_L_03", "Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05",
"Steak_V_L_05_NV_NL_06", "Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01",
"Steak_V_NL_02_NV_L_01", "Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05",
"Pizza_NV_L_02_NV_NL_01"), checkSubjId = c(53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047
), subjectVeganQuantification = c(5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), left_food = c("V", "V",
"V", "V", "V", "NV", "V", "V", "V", "NV", "V", "NV", "V",
"V", "V", "V", "V", "V", "NV", "NV"), left_label = c("L",
"L", "NL", "L", "NL", "L", "NL", "NL", "L", "L", "NL", "L",
"L", "L", "L", "L", "NL", "L", "L", "L"), right_food = c("NV",
"NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV",
"NV", "V", "NV", "NV", "V", "NV", "NV", "NV", "NV"), right_label = c("NL",
"NL", "L", "NL", "L", "NL", "L", "L", "NL", "NL", "L", "NL",
"NL", "NL", "NL", "NL", "L", "NL", "NL", "NL"), dv_inv = c(1,
1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0),
appear_selected = c("no vegan", "no vegan", "no vegan", "vegan",
"no vegan", "no vegan", "vegan", "vegan", "vegan", "no vegan",
"no vegan", "no vegan", "vegan", "no vegan", "vegan", "vegan",
"no vegan", "no vegan", "no vegan", "no vegan"), label_selected = c("NL",
"NL", "L", "L", "L", "NL", "NL", "NL", "L", "NL", "L", "NL",
"L", "NL", "NL", "NL", "L", "NL", "L", "L"), counter = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
dv_recoded = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,
1, 1, 0, 0, 0, 0), cong = c(1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0), veg_cong = c(0, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0), incong = c(0,
0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
noveg_cong = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 1, 1), control_cong = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), congVar = c("cong",
"cong", "incong", "veg_cong", "incong", "noveg_cong", "incong",
"veg_cong", "cong", "noveg_cong", "incong", "noveg_cong",
"veg_cong", "cong", "veg_cong", "veg_cong", "incong", "cong",
"noveg_cong", "noveg_cong"), cong2 = c(TRUE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE), veg_cong2 = c(FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
FALSE), incong2 = c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE), noveg_cong2 = c(FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
TRUE), fac = structure(c(1L, 1L, 3L, 2L, 3L, 4L, 3L, 2L,
1L, 4L, 3L, 4L, 2L, 1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("cong",
"veg_cong", "incong", "noveg_cong"), class = "factor"), trialType = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)), row.names = c(NA, 20L), class = "data.frame")
这是我目前拥有的代码
my_data_filt <- mutate(Group =
case_when(if (my_data_filt) crossCheckExperiment == 'Steak', 'steak' ~ "steak",
if (my_data_filt) crossCheckExperiment == 'Burger', 'burger' ~ "burger",
if (my_data_filt) crossCheckExperiment == 'Chicken', 'chicken' ~ "chicken",
if (my_data_filt) crossCheckExperiment == 'Pizza', 'pizza' ~ "pizza",
if (my_data_filt) crossCheckExperiment == 'Sausage', 'sausage' ~ "sausage",
if (my_data_filt) crossCheckExperiment == 'Curry', 'curry' ~ "sausage")
)
然而,我一直得到这个错误
Error in if (my_data_filt) crossCheckExperiment == "Steak" :
argument is not interpretable as logical
您的代码中有几个问题。首先,你没有在mutate()
的第一个参数中包含数据帧。第二是case_when()
没有被正确使用。第三是==
只在你寻找等于一个值的东西时起作用。如果您希望它等于几个值中的一个,请使用%in%
操作符。
mutate(my_data_filt,
Group = case_when(crossCheckExperiment %in% c('Steak', 'steak') ~ "steak",
crossCheckExperiment %in% c('Burger', 'burger') ~ "burger",
etc....))
如果变量中包含感兴趣的单词以及其他内容,则stringr
中的str_detect()
函数很有用:
mutate(my_data_filt,
Group = case_when(str_detect(crossCheckExperiment, '(S|s)teak') ~ "steak",
str_detect(crossCheckExperiment, '(B|b)urger') ~ "burger",
etc....))