按日期r对列的子集排序



我有一个数据帧,其中部分列的顺序不正确(它们是日期(。参见:

data1989 <- data.frame("date_fire" = c("1987-02-01", "1987-07-03", "1988-01-01"), 
"Foresttype" = c("oak", "pine", "oak"),
"meanSolarRad" = c(500, 550, 450),
"meanRainfall" = c(600, 300, 450),
"meanTemp" = c(14, 15, 12),
"1988.01.01" = c(0.5, 0.589, 0.66), 
"1986.06.03" = c(0.56, 0.447, 0.75), 
"1986.10.19" = c(0.8, NA, 0.83),
"1988.01.19" = c(0.75, 0.65,0.75), 
"1986.06.19" = c(0.1, 0.55,0.811),
"1987.10.19" = c(0.15, 0.12, 0.780),
"1988.01.19" = c(0.2, 0.22,0.32), 
"1986.06.19" = c(0.18, 0.21,0.23),
"1987.10.19" = c(0.21, 0.24, 0.250),
check.names = FALSE,
stringsAsFactors = FALSE) 
> data1989
date_fire Foresttype meanSolarRad meanRainfall meanTemp 1988.01.01 1986.06.03 1986.10.19 1988.01.19 1986.06.19 1987.10.19 1988.01.19 1986.06.19 1987.10.19
1 1987-02-01        oak          500          600       14      0.500      0.560       0.80       0.75      0.100       0.15       0.20       0.18       0.21
2 1987-07-03       pine          550          300       15      0.589      0.447         NA       0.65      0.550       0.12       0.22       0.21       0.24
3 1988-01-01        oak          450          450       12      0.660      0.750       0.83       0.75      0.811       0.78       0.32       0.23       0.25

我想通过增加日期来排序列,并保持前5列不变。请记住,在我的原始数据集中,我有30个初始列要保持不变。

如前所述,尽量避免使用包含日期、类别值和其他指标等数据元素的列的宽格式数据。相反,使用长格式的整洁数据,其中排序更容易,包括聚合、合并、绘图和建模。

具体而言,考虑reshape将日期融化为一个字段,例如值为季度。则order四分之一列容易:

# RESHAPE WIDE TO LONG
long_data1989 <- reshape(data1989, varying = names(data1989)[6:ncol(data1989)],
times = names(data1989)[6:ncol(data1989)],
v.names = "value", timevar = "quarter", ids = NULL,
new.row.names = 1:1E4, direction = "long")
# ORDER DATES AND RESET row.names
long_data1989 <- `row.names<-`(with(long_data1989, long_data1989[order(date_fire, quarter),]),
NULL)
long_data1989

在线演示

如果您想使用dplyr,这里有一个替代方案。请注意,每个colname都必须是唯一的。在你的df中有一些重复的

library(dplyr)
data1989 <- data.frame("date_fire" = c("1987-02-01", "1987-07-03", "1988-01-01"), 
"Foresttype" = c("oak", "pine", "oak"),
"meanSolarRad" = c(500, 550, 450),
"meanRainfall" = c(600, 300, 450),
"meanTemp" = c(14, 15, 12),
"1988.01.01" = c(0.5, 0.589, 0.66), 
"1986.06.03" = c(0.56, 0.447, 0.75), 
"1986.10.19" = c(0.8, NA, 0.83),
"1988.01.19" = c(0.75, 0.65,0.75), 
"1986.06.19" = c(0.1, 0.55,0.811),
"1987.10.19" = c(0.15, 0.12, 0.780),
# "1988.01.19" = c(0.2, 0.22,0.32),
# "1986.06.19" = c(0.18, 0.21,0.23),
# "1987.10.19" = c(0.21, 0.24, 0.250),
check.names = FALSE,
stringsAsFactors = FALSE) 
# Sort date column names. replace 6 with first date column 
sorted_colnames = sort(names(data1989)[6:ncol(data1989)])
# Sort columns. Replace 5 with last non-date column
data1989 %>% 
select(1:5, sorted_colnames)

我们可以将日期列名转换为Date类,执行order,然后将其用作列索引

i1 <- grep('^\d{4}\.\d{2}\.\d{2}$', names(data1989))
data1989[c(seq_len(i1[1]-1), order(as.Date(names(data1989)[i1], "%Y.%m.%d")) + i1[1]-1)]
# date_fire Foresttype meanSolarRad meanRainfall meanTemp 1986.06.03 1986.06.19 1986.06.19.1 1986.10.19 1987.10.19
#1 1987-02-01        oak          500          600       14      0.560      0.100         0.18       0.80       0.15
#2 1987-07-03       pine          550          300       15      0.447      0.550         0.21         NA       0.12
#3 1988-01-01        oak          450          450       12      0.750      0.811         0.23       0.83       0.78
#  1987.10.19.1 1988.01.01 1988.01.19 1988.01.19.1
#1         0.21      0.500       0.75         0.20
#2         0.24      0.589       0.65         0.22
#3         0.25      0.660       0.75         0.32

基本R解决方案(类似于@Parfaits(:

# Reshape dataframe wide --> long:
df_long <- 
reshape(data1989,
direction = "long",
varying = which(!(is.na(as.Date(names(data1989), "%Y.%m.%d")))),
idvar = which(is.na(as.Date(names(data1989), "%Y.%m.%d"))),
v.names = "value",
times = na.omit(as.Date(names(data1989), "%Y.%m.%d")),
timevar = "date_surveyed",
new.row.names = 1:(nrow(data1989)*length(na.omit(as.Date(names(data1989), 
"%Y.%m.%d")))))
# Order the data frame and reset the index: 
ordered_df_long <- data.frame(df_long[with(df_long, order(date_fire, date_surveyed)),],
row.names = NULL)

最新更新