测试一列中有多个dfs的行是否对齐



我正在尝试确认所有加载的数据集对齐。我用这段代码加载了160+ dfs。

temp<- list.files(pattern="*.txt")
for (i in 1:length(temp)) assign(temp[i], read.table(temp[i],
                                                 header=TRUE,
                                                 skip=50,
                                                 fill=TRUE,
                                                 na.strings= "N/A",
                                                 sep="t",
                                                 stringsAsFactors = FALSE))

df的名称对应于通过list.files加载的文件的名称。

每个df中有13列,第一列为类字符,其余12列为数字。

的第一个主人

structure(list(Variable = structure(c(2L, 1L, 3L, 1L, 1L, 1L, 
4L, 5L, 6L, 7L, 1L, 8L, 9L, 10L, 11L, 12L), .Label = c("", "A", 
"B", "D", "E", "F", "G", "H", "I", "J", "K", "J"), class = "factor"), 
X1 = c(0.625443367, NA, 0.975828895, 0.272109249, NA, 0.60981303, 
0.740266164, 0.748793558, 0.393551957, 0.700720729, NA, NA, 
NA, 0.60631742, 0.878735054, 0.689660004), X5 = c(0.025719335, 
NA, 0.285526204, 0.765384454, NA, 0.309337488, 0.723688501, 
0.072082882, 0.302196899, 0.372761724, NA, NA, NA, 0.790174238, 
0.002225558, 0.223989171), X10 = c(0.26885441, NA, 0.39713263, 
0.913498581, NA, 0.314770795, 0.218497868, 0.3676068, 0.376874153, 
0.297697154, NA, NA, NA, 0.297835764, 0.208253053, 0.519716555
), X20 = c(0.410364897, NA, 0.053464372, 0.038994949, NA, 
0.507067813, 0.5960247, 0.314273854, 0.628876194, 0.198480138, 
NA, NA, NA, 0.524002845, 0.592291169, 0.821217561), X25 = c(0.344474094, 
NA, 0.219674393, 0.47614914, NA, 0.771914616, 0.967852724, 
0.32422822, 0.234324065, 0.298566916, NA, NA, NA, 0.477679941, 
0.693787752, 0.622578002), X30 = c(0.084594429, NA, 0.765283085, 
0.64814448, NA, 0.576817659, 0.133505819, 0.049836577, 0.991388257, 
0.925608219, NA, NA, NA, 0.951692117, 0.787417848, 0.111758489
), X40 = c(0.466207213, NA, 0.127350859, 0.152163267, NA, 
0.232783401, 0.720905124, 0.258173477, 0.190757375, 0.422502772, 
NA, NA, NA, 0.505852543, 0.649902446, 0.478459633), X60 = c(0.796558464, 
NA, 0.318127005, 0.926698695, NA, 0.71276073, 0.308905911, 
0.223667445, 0.973655845, 0.206662152, NA, NA, NA, 0.501497143, 
0.432936032, 0.9384412), X70 = c(0.440348619, NA, 0.062959618, 
0.349952146, NA, 0.494978772, 0.32985092, 0.261574102, 0.077362987, 
0.006246765, NA, NA, NA, 0.771499936, 0.195204747, 0.463747804
), X75 = c(0.514698758, NA, 0.126075423, 0.269197828, NA, 
0.55381458, 0.068921312, 0.890791926, 0.484704932, 0.068021153, 
NA, NA, NA, 0.054235262, 0.252056176, 0.824778366), X80 = c(0.199104477, 
NA, 0.659367187, 0.829178346, NA, 0.668647488, 0.966607154, 
0.288800878, 0.563024474, 0.795475571, NA, NA, NA, 0.306623595, 
0.5060437, 0.609508687), X90 = c(0.470471037, NA, 0.016390655, 
0.12824423, NA, 0.15501053, 0.49744774, 0.587190593, 0.03860296, 
0.909975706, NA, NA, NA, 0.478775103, 0.466123148, 0.954408974
), X100 = c(0.452628911, NA, 0.543662895, 0.897387563, NA, 
0.852554978, 0.008392849, 0.727833622, 0.692729898, 0.327026377, 
NA, NA, NA, 0.171860186, 0.171295653, 0.711265826)), .Names = c("Variable", 
"X1", "X5", "X10", "X20", "X25", "X30", "X40", "X60", "X70", 
"X75", "X80", "X90", "X100"), class = "data.frame", row.names = c(NA, 
c-16L))

秒+每隔一个数据帧

structure(list(Variable = structure(c(2L, 1L, 3L, 1L, 1L, 1L, 
4L, 5L, 6L, 7L, 1L, 8L, 9L, 10L, 11L, 12L), .Label = c("", "A", 
"B", "D", "E", "F", "G", "H", "I", "J", "K", "J"), class = "factor"), 
X1 = c(0.71772223, NA, 0.445279447, 0.328470358, NA, 0.214795692, 
0.470536908, 0.82732089, 0.464811155, 0.52400864, NA, NA, 
NA, 0.858809889, 0.055051877, 0.499075195), X5 = c(0.496093039, 
NA, 0.405563046, 0.458302396, NA, 0.742840768, 0.192381432, 
0.48193671, 0.284869279, 0.1783585, NA, NA, NA, 0.295773915, 
0.092254844, 0.930647971), X10 = c(0.048447731, NA, 0.974280138, 
0.153448343, NA, 0.826966581, 0.133392525, 0.934463759, 0.701416491, 
0.137353566, NA, NA, NA, 0.52370726, 0.371699579, 0.393444667
), X20 = c(0.232144042, NA, 0.069088242, 0.163514017, NA, 
0.901702062, 0.579277573, 0.557358979, 0.697870038, 0.177942824, 
NA, NA, NA, 0.36971146, 0.182820789, 0.454973585), X25 = c(0.88175934, 
NA, 0.553180662, 0.556749037, NA, 0.184943228, 0.723875496, 
0.833417008, 0.952976588, 0.311258815, NA, NA, NA, 0.064346785, 
0.881648923, 0.497046119), X30 = c(0.745848598, NA, 0.926385172, 
0.613611568, NA, 0.362829401, 0.410915192, 0.090760519, 0.795099045, 
0.100925491, NA, NA, NA, 0.183288884, 0.837146519, 0.294641301
), X40 = c(0.110017321, NA, 0.142722733, 0.919954172, NA, 
0.303358785, 0.177350673, 0.758699705, 0.024830843, 0.973013422, 
NA, NA, NA, 0.678465189, 0.202803024, 0.363498489), X60 = c(0.038027421, 
NA, 0.128757622, 0.290252079, NA, 0.347772735, 0.319189776, 
0.033640467, 0.168359937, 0.678375815, NA, NA, NA, 0.910821265, 
0.676928769, 0.020390321), X70 = c(0.164725423, NA, 0.076708887, 
0.426349446, NA, 0.111726941, 0.885202911, 0.208038512, 0.483489353, 
0.63842772, NA, NA, NA, 0.563524016, 0.592008359, 0.902034255
), X75 = c(0.752526205, NA, 0.098905773, 0.698064546, NA, 
0.475803147, 0.608763409, 0.734027279, 0.281559317, 0.04209597, 
NA, NA, NA, 0.402657881, 0.93614701, 0.391491961), X80 = c(0.305369825, 
NA, 0.921383845, 0.897702227, NA, 0.888330486, 0.548283328, 
0.33654243, 0.864674852, 0.162143087, NA, NA, NA, 0.178685183, 
0.614535137, 0.390157716), X90 = c(0.032753318, NA, 0.068951568, 
0.432669498, NA, 0.089337049, 0.906531493, 0.150399021, 0.984217523, 
0.286168266, NA, NA, NA, 0.582072291, 0.682639084, 0.695373804
), X100 = c(0.479303334, NA, 0.392449509, 0.126543887, NA, 
0.631454516, 0.195057206, 0.780936135, 0.557763263, 0.091113557, 
NA, NA, NA, 0.191267017, 0.818748811, 0.820413052)), .Names = c("Variable", 
"X1", "X5", "X10", "X20", "X25", "X30", "X40", "X60", "X70", 
"X75", "X80", "X90", "X100"), class = "data.frame", row.names = c(NA, 
-16L))

本质上,我想保证变量列的内容和顺序是相同的。(注意底部的两个J)。

您需要一个数据帧列表。如果您将assigning直接放入环境并且temp保存名称,那么您可以使用

将此列表从环境中提取出来。
dfs<-mget(unlist(temp))

现在您可以检查每个

的第一列是否相同
master<-dfs[[1]][,1]
all(unlist(lapply(dfs,function(df) all(master==df[,1]))))

这将生成一个列表,根据第一个数据帧检查每个帧,然后确保它们都为真。

但是,当您读取时,考虑将assigning加载到环境中,而不是加载到列表中。然后,您可以对列表执行as.environment,如果需要,可以执行attach

最新更新