我有以下脚本
library(zoo)
library(dplyr)
library(tidyverse)
dat<-read.csv("sep_2015.csv",header=F)
dat$date<-seq(from=as.Date("2015-09-01"),to=as.Date("2015-12-31"),by="day")
dat %>%
rownames_to_column() %>%
filter(V2 < 90 &
rollsum(V2 < 90, 7, fill = NA, align = "left") >=7 &
rollsum(V1 > 2, 7, fill = NA, align = "left") >=7 &
rollsum(V2 < 90, 20, fill = NA, align = "left") >= 15 ) %>%
slice(1)
输出如下:
rowname V1 V2 date
1 41 5.583102 45.83082 2015-10-11
我正在处理多个文件(从sep_1979到sep_2020),其名称如下:
.....
sep_2014.csv
sep_2015.csv
sep_2016.csv
....
示例数据(sep_2015.csv):
structure(list(V1 = c(1.781016934, 5.245650757, 5.253808428,
3.4500901, 1.904631502, 5.583155269, 5.29432007, 2.493240863,
2.356109739, 2.505279709, 3.100681377, 4.352358556, 9.021734968,
7.037204701, 1.946181317, 2.375415753, 4.818568252, 4.508796957,
5.009930139, 3.413858254, 2.642863447, 4.496943406, 1.101138556,
1.58130412, 5.907897257, 9.310602975, 12.8050039, 10.52127595,
10.58324383, 5.719938811, 5.65915423, 10.23184856, 8.916484733,
8.006767597, 7.623137477, 6.735120637, 3.913288259, 5.145787145,
5.516434408, 3.766181753, 5.583101736, 8.62718523, 8.55994889,
7.738160311, 9.428281922, 14.54588945, 21.1567956, 19.25095128,
11.18693546, 6.459297562, 8.113929381, 6.791538854, 2.30569946,
4.271416627, 7.043436661, 4.044279911, 4.841727481, 7.050044333,
8.646260752, 4.475859911, 9.545056312, 6.185563838, 7.545238783,
8.397502556, 6.124560982, 4.50970303, 6.682802556, 6.247809616,
5.986996325, 4.525555447, 8.719674593, 8.245588214, 6.028287485,
2.346912866, 2.842578055, 7.320520815, 6.337556706, 3.298882387,
4.512427524, 8.360934487, 9.222761141, 8.958611924, 7.982833144,
10.50259016, 8.866081434, 5.120412581, 11.46034903, 12.4205173,
10.25257175, 2.466221616, 2.692953769, 0.681709806, 1.362974813,
5.5663453, 9.601658711, 8.416295206, 3.412426219, 6.865540765,
11.76102145, 12.0902957, 6.870356614, 2.020952899, 4.363628479,
4.879838354, 4.059975788, 10.37719133, 10.37127041, 11.40995727,
10.7018141, 11.43079722, 6.58367129, 6.628593068, 2.18586382,
3.712579965, 2.874565445, 2.791741886, 4.21014224, 0.881548513,
8.251492289, 7.62408847, 5.940576571, 8.981119084), V2 =
c(121.0261069,
85.00304523, 81.87989778, 103.7505395, 210.2369147, 264.2405565,
282.9403406, 26.88123501, 19.2594093, 8.196100414, 45.26456729,
58.17439097, 87.16714037, 109.3834486, 114.840361, 38.91351028,
37.81463493, 24.49148024, 29.33726467, 13.51886983, 78.0736854,
41.91203091, 55.80655486, 338.9064404, 338.2241727, 4.297849857,
343.7324297, 266.8110122, 189.2300192, 118.7736246, 86.03606596,
120.0474015, 151.7655159, 85.86331853, 67.23147222, 51.7641341,
20.78357193, 271.0026858, 266.308435, 311.3070459, 45.83082398,
64.93928317, 63.04855403, 35.94151677, 43.73557838, 55.52061899,
55.9783368, 72.09428879, 81.11278896, 75.28379332, 56.17839757,
46.85929244, 27.56930947, 64.41308939, 75.61366497, 66.62037577,
43.60322015, 89.88800444, 101.8289993, 99.75479947, 80.43658344,
78.52117729, 83.81080108, 91.30824192, 96.42780607, 101.3614562,
118.3864844, 101.9711437, 103.8823967, 83.42613393, 74.27778556,
97.92986085, 113.0191678, 147.4666885, 134.7889272, 104.79725,
126.037476, 121.3500898, 83.21475735, 89.23421897, 92.92463803,
85.92837336, 80.02208464, 55.23356116, 52.95700516, 30.38190174,
64.76557575, 96.63946752, 86.73461083, 95.37929333, 62.17987768,
12.7633562, 273.7881121, 76.47255644, 113.7227032, 149.0157726,
78.67676081, 71.17172364, 89.33577561, 154.9133108, 221.6713695,
85.69030561, 78.51796584, 97.6857188, 78.00024078, 67.04777272,
66.22047934, 66.65994223, 68.62200021, 79.88214929, 84.2911525,
98.45874358, 175.7545009, 214.3253056, 277.4996992, 348.4682651,
97.83108644, 348.9214276, 49.4694104, 70.54130568, 79.26767621,
80.66615013), date = structure(c(16679, 16680, 16681, 16682,
16683, 16684, 16685, 16686, 16687, 16688, 16689, 16690, 16691,
16692, 16693, 16694, 16695, 16696, 16697, 16698, 16699, 16700,
16701, 16702, 16703, 16704, 16705, 16706, 16707, 16708, 16709,
16710, 16711, 16712, 16713, 16714, 16715, 16716, 16717, 16718,
16719, 16720, 16721, 16722, 16723, 16724, 16725, 16726, 16727,
16728, 16729, 16730, 16731, 16732, 16733, 16734, 16735, 16736,
16737, 16738, 16739, 16740, 16741, 16742, 16743, 16744, 16745,
16746, 16747, 16748, 16749, 16750, 16751, 16752, 16753, 16754,
16755, 16756, 16757, 16758, 16759, 16760, 16761, 16762, 16763,
16764, 16765, 16766, 16767, 16768, 16769, 16770, 16771, 16772,
16773, 16774, 16775, 16776, 16777, 16778, 16779, 16780, 16781,
16782, 16783, 16784, 16785, 16786, 16787, 16788, 16789, 16790,
16791, 16792, 16793, 16794, 16795, 16796, 16797, 16798, 16799,
16800), class = "Date")), row.names = c(NA, -122L), class = "data.frame")
我想:
(1)对每个文件
应用上述脚本(2)保存日期;从splice (1)到文本文件
(3)最终输出是1979年到2020年的日期集合
我如何在R中实现这个?如果你能帮我,我会很感激的。
您可以尝试以下解决方案:
- 从每个文件名中提取年份值
- 使用
paste
创建该年份的日期序列,并将其添加为数据中的一列。 - 为条件筛选行,并从中选择第一行。
- 将结果合并到一个文件中
library(tidyverse)
library(zoo)
all_files <- list.files(pattern = '^sep_.*\.csv$')
map_df(all_files, function(file) {
year <- parse_number(file)
dates <- seq(as.Date(paste(year, 9, 1, sep = '-')),
as.Date(paste(year, 12, 31, sep = '-')), by = 'day')
dat <- read.csv(file,header=FALSE)
dat %>%
mutate(date = dates) %>%
filter(V2 < 90 &
rollsum(V2 < 90, 7, fill = NA, align = "left") >=7 &
rollsum(V1 > 2, 7, fill = NA, align = "left") >=7 &
rollsum(V2 < 90, 20, fill = NA, align = "left") >= 15) %>%
slice(1) %>%
transmute(date, file = file)
}) -> result
result
应该看起来像这样-
result
# date filename
#1979-10-10 sep_1979.csv
#1980-11-21 sep_1980.csv
#1981-09-10 sep_1981.csv
#
#
您可以根据自己的选择将result
写入csv或文本文件。
#CSV
write.csv(result, 'result.csv', row.names = FALSE)
#TEXT
write.table(result, 'result.txt', row.names = FALSE)