删除数据帧中位于R中两个整数值之间的行



我有一个来自某个参与者在情景任务中的行为的数据框架,假设该情景从90开始,当我们有一个可能在40秒范围内的特定触发时结束。我正在做一个示例数据帧,其中一列包含行数,另一列包含实际触发器。

ex1 <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
ex2 <- c(41,1,1,90,1,1,1,44,1,90,1,2,42,1,1,1,1,90,1,41)
df <- data.frame(ex1,ex2)
> df
ex1 ex2
1    1  41
2    2   1
3    3   1
4    4  90
5    5   1
6    6   1
7    7   1
8    8  44
9    9   1
10  10  90
11  11   1
12  12   2
13  13  42
14  14   1
15  15   1
16  16   1
17  17   1
18  18  90
19  19   1
20  20  41

现在,我想做的是删除这一集开头和结尾之外的所有行,因为它们是打字行为的记录,因为它不在这一集之外,所以不有趣。因此,我想最终得到这样的数据帧:

ex1 <- c(1,4,5,6,7,8,10,11,12,13,18,19,20)
ex2 <- c(41,90,1,1,1,44,90,1,2,42,90,1,41)
df <- data.frame(ex1,ex2)
> df
ex1 ex2
1    1  41
2    4  90
3    5   1
4    6   1
5    7   1
6    8  44
7   10  90
8   11   1
9   12   2
10  13  42
11  18  90
12  19   1
13  20  41

我一直在尝试使用subset,但我无法使它在范围和数字之间工作。

提前感谢!

设置值:

ex1 <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
ex2 <- c(41,1,1,90,1,1,1,44,1,90,1,2,42,1,1,1,1,90,1,41)
before <- data.frame(ex1,ex2)
before
ex1 ex2
1    1  41
2    2   1
3    3   1
4    4  90
5    5   1
6    6   1
7    7   1
8    8  44
9    9   1
10  10  90
11  11   1
12  12   2
13  13  42
14  14   1
15  15   1
16  16   1
17  17   1
18  18  90
19  19   1
20  20  41

我已经建立了一个应该做这项工作的函数。该函数是基于我对您问题的理解构建的,因此我的函数可能无法完全满足您的设置。然而,我相信你可以通过稍微调整一下功能来满足你的需求。

library(dplyr)
episode <- function(start = 90, end = 40, data){#the default value of start is 90 and the default value of end is 40
#retrieving all the row indices that correspond to values that indicates an end
end_idx <- which(data$ex2>=end & data$ex2<=end+10)
#retrieving all the row indices that correspond to values that indicates a start
start_idx <- which(data$ex2==start)

#declaring a list that would contain the extracted sub samples in your liking
sub_sample_list <- vector("list", length(start_idx))
#looping through the start indices
for(i in 1:length(start_idx)){
#extracting the minimum among those have values larger than the i-th start_idx value
temp_end <- min(end_idx[end_idx>start_idx[i]]) 
#extracting the rows between the i-th start index and the minimum end index that is larger than the i-th start index
temp_sub_sample <- data[start_idx[i]:temp_end,]
#saving the sub-sample in the list
sub_sample_list[[i]] <- temp_sub_sample
}
#now row binding all the extracted sub samples 
clean.df <- do.call(rbind.data.frame, sub_sample_list)
#if there is an end index that is smaller than the minimum start index
if(min(end_idx)< min(start_idx)){
#only retrieve those corresponding rows and add to the clean.df
clean.df <- rbind(data[end_idx[end_idx<min(start_idx)],], clean.df)
}
#cleaning up the row numbers a bit
rownames(clean.df) <- 1:nrow(clean.df)

#sort the clean.df by ex1
clean.df <- clean.df %>% arrange(ex1)

#returning the clean.df
return(clean.df)
}

使用episode函数生成after数据集。

after <- episode(start = 90, end = 40, before)
after
ex1 ex2
1    1  41
2    4  90
3    5   1
4    6   1
5    7   1
6    8  44
7   10  90
8   11   1
9   12   2
10  13  42
11  18  90
12  19   1
13  20  41

和基数:

ex1 <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
ex2 <- c(41,1,1,90,1,1,1,44,1,90,1,2,42,1,1,1,1,90,1,41)
df <- data.frame(ex1,ex2)

索引序列[90]的开始,如果不是第1行,则在开始之前将行子集输出为不完整:

start_idx <- which(df$ex2 == 90)
df <- df[start_idx[1]:nrow(df), ]

重新索引开始和索引结束>=40&lt;90

start_idx <- which(df$ex2 == 90)
end_idx <- which(df$ex2 >= 40 & df$ex2 < 90)

制作一个空列表,并通过循环,子设置开始:结束部分

df_lst <- list()
for (k in 1:length(start_idx)) {
df_lst[[k]] <- df[start_idx[k]:end_idx[k], ]
}

把它们放在一起

df2 <- do.call('rbind' df_lst)
df2
ex1 ex2
4    4  90
5    5   1
6    6   1
7    7   1
8    8  44
10  10  90
11  11   1
12  12   2
13  13  42
18  18  90
19  19   1
20  20  41

相当紧凑。

相关内容

  • 没有找到相关文章

最新更新