r-具有数据帧的多个子集的循环



我有一个数据帧fish.test0,我想为它生成与group列匹配的特定变量(在varlist中(,以创建一个将进行统计测试的子数据帧。测试结果保存在tests.res.t中。我想循环varlist,以便在varlist中每个输入都得到一个结果

脚本:

varlist <- c("Abiotrophia","Alphatorquevirus")
for (i in varlist) {
fish.test <- fish.test0[grep("i",fish.test0$group),]


column <- c("ACDC")
tests <- list()

dat_test <- sapply( column, function(colx) 
lapply( unique(fish.test$Merge), function(x) 
fisher.test( data.frame( 
a=c(( fish.test[ which(fish.test$Merge %in% x)[2],"Present"] - 
fish.test[ which(fish.test$Merge %in% x)[2], colx] ),fish.test[ which(fish.test$Merge %in% x)[2], colx]
), 
b=c(( fish.test[ which(fish.test$Merge %in% x)[1],"NotPresent"] - 
fish.test[ which(fish.test$Merge %in% x)[1], colx] ), fish.test[ which(fish.test$Merge %in% x)[1], colx]))) #,alternative = "greater"
) )

rownames(dat_test) <- unique(fish.test$Merge )
colnames(dat_test) <- column
tests.res <- sapply(dat_test[1:dim(dat_test)[1],1], function(x) {
c(x$estimate[1],
x$estimate[2],
ci.lower = x$conf.int[1],
ci.upper = x$conf.int[2],
p.value = x$p.value)
})
tests.res.t <- as.data.frame(t(tests.res))
}

测试数据:

fish.test0 <- structure(list(Present = c(4L, 4L, 9L, 9L, 57L, 57L, 146L, 146L, 
91L, 91L, 26L, 26L, 6L, 6L, 12L, 12L, 33L, 33L, 10L, 10L, 66L, 
66L, 4L, 4L, 4L, 4L, 9L, 9L, 18L, 18L, 19L, 19L, 51L, 51L, 50L, 
50L, 12L, 12L, 7L, 7L, 14L, 14L, 27L, 27L, 9L, 9L, 5L, 5L, 6L, 
6L, 22L, 22L, 3L, 3L, 14L, 14L, 4L, 4L, 15L, 15L, 6L, 6L, 8L, 
8L, 4L, 4L), NotPresent = c(11L, 11L, 44L, 44L, 126L, 126L, 532L, 
532L, 382L, 382L, 97L, 97L, 14L, 14L, 43L, 43L, 85L, 85L, 41L, 
41L, 336L, 336L, 19L, 19L, 27L, 27L, 67L, 67L, 108L, 108L, 81L, 
81L, 240L, 240L, 258L, 258L, 47L, 47L, 31L, 31L, 82L, 82L, 110L, 
110L, 63L, 63L, 178L, 178L, 672L, 672L, 451L, 451L, 120L, 120L, 
104L, 104L, 47L, 47L, 387L, 387L, 94L, 94L, 300L, 300L, 133L, 
133L), group = c("G__Abiotrophia_NotPresent_Anus", "G__Abiotrophia_Present_Anus", 
"G__Abiotrophia_NotPresent_Bile duct", "G__Abiotrophia_Present_Bile duct", 
"G__Abiotrophia_NotPresent_Bone/Soft tissue", "G__Abiotrophia_Present_Bone/Soft tissue", 
"G__Abiotrophia_NotPresent_Breast", "G__Abiotrophia_Present_Breast", 
"G__Abiotrophia_NotPresent_Colorectum", "G__Abiotrophia_Present_Colorectum", 
"G__Abiotrophia_NotPresent_Esophagus", "G__Abiotrophia_Present_Esophagus", 
"G__Abiotrophia_NotPresent_Gallbladder", "G__Abiotrophia_Present_Gallbladder", 
"G__Abiotrophia_NotPresent_Head and neck", "G__Abiotrophia_Present_Head and neck", 
"G__Abiotrophia_NotPresent_Kidney", "G__Abiotrophia_Present_Kidney", 
"G__Abiotrophia_NotPresent_Liver", "G__Abiotrophia_Present_Liver", 
"G__Abiotrophia_NotPresent_Lung", "G__Abiotrophia_Present_Lung", 
"G__Abiotrophia_NotPresent_Lymphoid tissue", "G__Abiotrophia_Present_Lymphoid tissue", 
"G__Abiotrophia_NotPresent_Mesothelium", "G__Abiotrophia_Present_Mesothelium", 
"G__Abiotrophia_NotPresent_Nervous system", "G__Abiotrophia_Present_Nervous system", 
"G__Abiotrophia_NotPresent_Ovary", "G__Abiotrophia_Present_Ovary", 
"G__Abiotrophia_NotPresent_Pancreas", "G__Abiotrophia_Present_Pancreas", 
"G__Abiotrophia_NotPresent_Prostate", "G__Abiotrophia_Present_Prostate", 
"G__Abiotrophia_NotPresent_Skin", "G__Abiotrophia_Present_Skin", 
"G__Abiotrophia_NotPresent_Small intestine", "G__Abiotrophia_Present_Small intestine", 
"G__Abiotrophia_NotPresent_Stomach", "G__Abiotrophia_Present_Stomach", 
"G__Abiotrophia_NotPresent_Unknown", "G__Abiotrophia_Present_Unknown", 
"G__Abiotrophia_NotPresent_Urothelial tract", "G__Abiotrophia_Present_Urothelial tract", 
"G__Abiotrophia_NotPresent_Uterus", "G__Abiotrophia_Present_Uterus", 
"G__Alphatorquevirus_NotPresent_Bone/Soft tissue", "G__Alphatorquevirus_Present_Bone/Soft tissue", 
"G__Alphatorquevirus_NotPresent_Breast", "G__Alphatorquevirus_Present_Breast", 
"G__Alphatorquevirus_NotPresent_Colorectum", "G__Alphatorquevirus_Present_Colorectum", 
"G__Alphatorquevirus_NotPresent_Esophagus", "G__Alphatorquevirus_Present_Esophagus", 
"G__Alphatorquevirus_NotPresent_Kidney", "G__Alphatorquevirus_Present_Kidney", 
"G__Alphatorquevirus_NotPresent_Liver", "G__Alphatorquevirus_Present_Liver", 
"G__Alphatorquevirus_NotPresent_Lung", "G__Alphatorquevirus_Present_Lung", 
"G__Alphatorquevirus_NotPresent_Pancreas", "G__Alphatorquevirus_Present_Pancreas", 
"G__Alphatorquevirus_NotPresent_Skin", "G__Alphatorquevirus_Present_Skin", 
"G__Alphatorquevirus_NotPresent_Urothelial tract", "G__Alphatorquevirus_Present_Urothelial tract"
), ABCD = c(3L, 2L, 17L, 6L, 34L, 18L, 240L, 53L, 321L, 73L, 
87L, 25L, 6L, 3L, 20L, 8L, 15L, 7L, 19L, 4L, 265L, 42L, 6L, 1L, 
4L, 2L, 22L, 4L, 70L, 13L, 54L, 12L, 116L, 33L, 58L, 11L, 6L, 
2L, 26L, 6L, 42L, 8L, 74L, 18L, 19L, 3L, 52L, 0L, 288L, 5L, 377L, 
17L, 110L, 2L, 19L, 3L, 21L, 2L, 298L, 9L, 60L, 6L, 68L, 1L, 
89L, 3L), Total = c(15L, 15L, 53L, 53L, 183L, 183L, 678L, 678L, 
473L, 473L, 123L, 123L, 20L, 20L, 55L, 55L, 118L, 118L, 51L, 
51L, 402L, 402L, 23L, 23L, 31L, 31L, 76L, 76L, 126L, 126L, 100L, 
100L, 291L, 291L, 308L, 308L, 59L, 59L, 38L, 38L, 96L, 96L, 137L, 
137L, 72L, 72L, 183L, 183L, 678L, 678L, 473L, 473L, 123L, 123L, 
118L, 118L, 51L, 51L, 402L, 402L, 100L, 100L, 308L, 308L, 137L, 
137L), Merge = c("Abiotrophia_Anus", "Abiotrophia_Anus", "Abiotrophia_Bile duct", 
"Abiotrophia_Bile duct", "Abiotrophia_Bone/Soft tissue", "Abiotrophia_Bone/Soft tissue", 
"Abiotrophia_Breast", "Abiotrophia_Breast", "Abiotrophia_Colorectum", 
"Abiotrophia_Colorectum", "Abiotrophia_Esophagus", "Abiotrophia_Esophagus", 
"Abiotrophia_Gallbladder", "Abiotrophia_Gallbladder", "Abiotrophia_Head and neck", 
"Abiotrophia_Head and neck", "Abiotrophia_Kidney", "Abiotrophia_Kidney", 
"Abiotrophia_Liver", "Abiotrophia_Liver", "Abiotrophia_Lung", 
"Abiotrophia_Lung", "Abiotrophia_Lymphoid tissue", "Abiotrophia_Lymphoid tissue", 
"Abiotrophia_Mesothelium", "Abiotrophia_Mesothelium", "Abiotrophia_Nervous system", 
"Abiotrophia_Nervous system", "Abiotrophia_Ovary", "Abiotrophia_Ovary", 
"Abiotrophia_Pancreas", "Abiotrophia_Pancreas", "Abiotrophia_Prostate", 
"Abiotrophia_Prostate", "Abiotrophia_Skin", "Abiotrophia_Skin", 
"Abiotrophia_Small intestine", "Abiotrophia_Small intestine", 
"Abiotrophia_Stomach", "Abiotrophia_Stomach", "Abiotrophia_Unknown", 
"Abiotrophia_Unknown", "Abiotrophia_Urothelial tract", "Abiotrophia_Urothelial tract", 
"Abiotrophia_Uterus", "Abiotrophia_Uterus", "Alphatorquevirus_Bone/Soft tissue", 
"Alphatorquevirus_Bone/Soft tissue", "Alphatorquevirus_Breast", 
"Alphatorquevirus_Breast", "Alphatorquevirus_Colorectum", "Alphatorquevirus_Colorectum", 
"Alphatorquevirus_Esophagus", "Alphatorquevirus_Esophagus", "Alphatorquevirus_Kidney", 
"Alphatorquevirus_Kidney", "Alphatorquevirus_Liver", "Alphatorquevirus_Liver", 
"Alphatorquevirus_Lung", "Alphatorquevirus_Lung", "Alphatorquevirus_Pancreas", 
"Alphatorquevirus_Pancreas", "Alphatorquevirus_Skin", "Alphatorquevirus_Skin", 
"Alphatorquevirus_Urothelial tract", "Alphatorquevirus_Urothelial tract"
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 9L, 12L, 
11L, 13L, 14L, 16L, 15L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 
25L, 26L, 28L, 27L, 29L, 30L, 31L, 32L, 34L, 33L, 35L, 36L, 38L, 
37L, 40L, 39L, 42L, 43L, 45L, 44L, 47L, 46L, 1011L, 1012L, 1014L, 
1013L, 1015L, 1016L, 1017L, 1018L, 1019L, 1020L, 1022L, 1021L, 
1023L, 1024L, 1026L, 1025L, 1027L, 1028L, 1029L, 1030L), class = "data.frame")

这可能不是一个答案,但它应该有助于改进代码。如果我大错特错,我会马上删除我的答案。我已经放弃了我不理解的测试业务,但你的问题似乎是提取。

第一件事是,您需要删除grep命令中的引号,请尝试:

varlist <- c("Abiotrophia","Alphatorquevirus")
for( i in varlist )
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( i, fish.test0$group ), ]
print( head( fish.test ) )      
}

据我所知,您需要在循环之外定义columntests。这能给你更多想要的东西吗:

varlist <- c("Abiotrophia","Alphatorquevirus")
column <- "ACDC"
tests <- list()
for( i in 1 : length( varlist ) )    # index can be used later to fill the list
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( varlist[ i ], fish.test0$group ), ]
# add a column with your name of choice
fish.test <- cbind( fish.test, c( 1: length( fish.test$group ) ) )
colnames( fish.test )[ length( fish.test ) ] <- column
# write each result into your defined list
tests[[ i ]] <-  fish.test
}

最新更新