我正在尝试创建一个函数,我可以给它一个日期时间的数据帧,比如下面的dfTdata data.frame,指定一个特定的日期时间列,比如trWindDateTime列,并让它返回与第一个日期时间列平均差最小的另一个日期日期时间列。这结合了我真正难以处理的两件事,即在R和循环中处理日期时间列。我发现了一些代码,我一直试图在下面破解。如有任何提示,我们将不胜感激。
代码:
## Taking difference between specified column and all other columns in data.frame.
# I know this code doesn't exactly do what I'm trying to accomplish here.
# What I would like to do is pass it dfTdata and specifiy a column name, and have
# it take the difference between that date column and all the others.
funcDiff <- function(x) {
for(i in seq_along(x)) {
x[, i] <- x[, i] - x[, (i+1)]
}
}
## Creating data.frame of diff between columns
dfDateDiff <- funcDiff(dfTdata)
## Trying to take the mean for each column of differences
dfMeanDiff <- lapply(dfDateDiff, mean)
## Trying to return the column with the minimum average mean
ColNum <- which(dfMeanDiff == min(dfMeanDiff))
数据:dput(dfTdata)
structure(list(TrWindDateTime = c(1422683580, 1422559320, 1423162920,
1423598220, 1423697880, 1423545300, 1424580900, 1424242200, 1426370640,
1426616460, 1426479960, 1426721700, 1427121780, 1428707940, 1430257500,
1432256100, 1433912820, 1435505100, 1437770880, 1440517860, 1440373200,
1440028200, 1446993480, 1449501900, 1449095520, 1449449340, 1450308780,
1449771840, 1438615980, 1439432400, 1440340980, 1440090360, 1440027300,
1439232120, 1440593340, 1438405920, 1442772900, 1443192720, 1443110040,
1443153600, 1441950840, 1443343800, 1443723720, 1445739720, 1443759360,
1444959300, 1444851960, 1445704920, 1443314280, 1443400980, 1443834480,
1445292540, 1447201440, 1447201560, 1447535460, 1447542600, 1448128440,
1446287220, 1445973000, 1448101560, 1447932900, 1446932220, 1447351680,
1447981980, 1447313160, 1447693140, 1447702980, 1447801140, 1446873960,
1447267260, 1446932700, 1448061660, 1447280580, 1447534680, 1446919980,
1447698180, 1447057440, 1447205880, 1447693440, 1447016700, 1448043660,
1447376220, 1447931340, 1449606480, 1448952120, 1448352120, 1448165040,
1450035060, 1449361860, 1450128060, 1449465540, 1449707520, 1449247140,
1449350520, 1449704160, 1449717180, 1449711360, 1449768540, 1449862440,
1449756300), WindDateTime = c(1422690780, 1422560640, 1423163280,
1423600200, 1423701000, 1423546920, 1424582880, 1424243460, 1426373160,
1426617000, 1426484820, 1426725600, 1427123580, 1428708420, 1430259240,
1432260660, 1433914800, 1435507800, NA, 1440519180, 1440373800,
1440028200, 1446994860, 1449503460, 1449098340, 1449449520, 1450310220,
1449772620, 1438616580, 1439432400, 1440341460, 1440090600, 1440028440,
1439232960, 1440594240, 1438406340, 1442774040, 1443193200, 1443110940,
1443154980, 1441952640, 1443353400, 1443724440, 1445740980, 1443761220,
1444962360, 1444853940, 1445705040, 1443316200, 1443402420, 1443835560,
1445293380, 1447202340, 1447202760, 1447535760, 1447543080, 1448130120,
1446287520, 1445974860, 1448101740, 1447945680, 1446934620, 1447353240,
1447984020, 1447313760, 1447693740, 1447705080, 1447801920, 1446874620,
1447271580, 1446933720, 1448066700, 1447283160, 1447534860, 1446920820,
1447699380, 1447058820, 1447207200, 1447694400, 1447017120, 1448044320,
1447376760, 1447931580, 1449607020, 1448952960, 1448354640, 1448167080,
1450036500, 1449363240, 1450128060, 1449466380, 1449707940, 1449247500,
1449357780, 1449704760, 1449718200, 1449712800, 1449769200, 1449864360,
1449758340), TomDateTime = c(1422688560, NA, 1423162980, 1423599540,
1423709640, NA, 1424580900, 1424242380, 1426380780, 1426617840,
1426486620, 1426722660, 1427122380, 1428709200, 1430257740, 1432275120,
1433921340, 1435508940, 1437769140, 1440517980, 1440373200, 1440027360,
1446993540, 1449503280, 1449097860, 1449449520, 1450310520, 1449773220,
1438616040, 1439431500, 1440342840, 1440091260, 1440027480, 1439232120,
1440593400, 1438407900, 1442781360, 1443193560, 1443110040, 1443153660,
1441951500, 1443352200, 1443724500, NA, 1443763380, 1444968780,
1444853580, 1445706240, 1443315420, 1443401340, 1443834540, 1445292540,
1447201440, 1447201620, 1447538340, 1447554360, 1448128800, 1446287220,
1445989380, 1448109540, 1447932900, 1446934260, 1447351800, 1447982520,
NA, 1447693500, 1447703040, 1447802640, 1446875880, 1447275360,
1446940440, 1448061720, 1447282320, 1447548840, 1446921780, 1447698240,
1447058280, 1447206540, 1447703700, 1447016760, 1448052060, 1447376220,
1447936920, 1449607500, 1448956380, 1448353920, 1448166120, 1450035900,
1449363540, 1450126920, 1449465540, 1449707580, 1449247500, 1449350520,
1449716460, 1449730920, 1449717120, 1449769200, 1449862740, 1449757320
), CircleDateTime = c(1422688740, 1422560280, NA, 1423599720, 1423707120,
1423546800, 1424591940, 1424242800, 1426380900, 1426617840, 1426484640,
1426724280, 1427122680, 1428708720, 1430257800, 1432260720, 1433921340,
1435509060, 1437770880, 1440518220, 1440373380, 1440030660, 1446993780,
1449503520, 1449097560, 1449449580, 1450310100, 1449772680, 1438616400,
1439431740, 1440343440, 1440091620, 1440027600, 1439232540, 1440594060,
1438408020, 1442781660, 1443195120, 1443110580, 1443153720, 1441952220,
1443353160, 1443725040, 1445740920, 1443763500, 1444965120, 1444853760,
1445706960, 1443315420, 1443401640, 1443835560, 1445292660, 1447201980,
1447201680, 1447538340, 1447542900, 1448128800, 1446287280, 1445991480,
1448110380, 1447932960, 1446934860, 1447353540, 1447982940, NA,
1447693320, 1447703220, 1447802820, 1446876000, 1447271820, 1446933480,
1448062020, 1447283100, 1447549020, 1446921300, 1447698300, 1447058280,
1447206840, 1447701300, 1447017240, 1448044320, 1447376340, 1447937700,
1449607380, 1448952840, 1448354280, 1448166960, 1450036800, 1449363300,
1450129380, 1449466020, 1449707820, 1449247680, 1449350640, 1449707100,
1449718440, 1449718500, 1449769920, 1449864600, 1449757560),
SteamDateTime = c(NA, 1422568800, 1423200600, 1423607880,
NA, NA, NA, NA, 1426610820, 1426628100, 1426489860, NA, 1427138220,
1428715200, NA, NA, 1433922660, 1435511100, NA, 1440545100,
1440387480, NA, 1447015500, 1449516420, 1449100500, 1449453600,
1450328820, 1449777000, 1438638480, NA, 1440351900, 1440105240,
1440053760, 1439255400, 1440629040, 1438411500, 1442808420,
1443210900, 1443148620, NA, NA, NA, NA, NA, NA, NA, NA, 1445724660,
NA, NA, 1443856860, 1445314620, 1447225440, NA, 1447566360,
NA, NA, 1446309660, NA, 1448127000, 1447964100, 1446969300,
1447365240, NA, NA, 1447728960, 1447729620, NA, NA, 1447300020,
1446963840, 1448076900, NA, 1447572600, 1446937020, 1447717200,
1447100280, NA, 1447734360, 1447051080, 1448075040, 1447388280,
1447956000, 1449641580, NA, 1448412600, NA, 1450056840, 1449381360,
NA, NA, 1449736800, 1449273600, 1449390900, 1449719340, NA,
NA, 1449787440, NA, 1449772080), AnyDateTime = c(NA, 1422561120,
1423200180, 1423607880, NA, NA, NA, NA, 1426404420, 1426628100,
1426489860, 1426741080, 1427138220, 1428715200, NA, NA, 1433922660,
1435511100, 1437781680, 1440545100, 1440387480, NA, 1447015500,
1449516420, 1449100500, 1449453600, 1450328820, 1449777000,
1438638480, NA, 1440351900, 1440105240, 1440053760, 1439255400,
1440629040, 1438411500, 1442808420, 1443210900, 1443148620,
NA, NA, NA, NA, NA, NA, NA, NA, 1445724660, NA, NA, 1443856860,
1445314620, 1447225440, NA, 1447566360, NA, NA, 1446309660,
NA, 1448127000, 1447964100, 1446969300, 1447365240, NA, NA,
1447728960, 1447729620, NA, NA, 1447300020, 1446963840, 1448076900,
NA, 1447572600, 1446937020, 1447717200, 1447100280, NA, 1447734360,
1447051080, 1448075040, 1447388280, 1447956000, 1449641580,
NA, 1448412600, NA, 1450056840, 1449381360, NA, NA, 1449736800,
1449273600, 1449390900, 1449719340, NA, NA, 1449787440, 1449894600,
1449772080)), .Names = c("TrWindDateTime", "WindDateTime",
"TomDateTime", "CircleDateTime", "SteamDateTime", "AnyDateTime"
), row.names = c(NA, 100L), class = "data.frame")
这就是你想要的吗?您将引用列的名称作为字符串传递给它,并告诉它您到底想要什么。如果默认情况下为您提供与引用列之间具有最小(绝对值)时间差的列。您也可以只要求它提供列的名称,或者列的名称以及引用列和输出列之间的差异。
# Run this to convert to POSIXct
library('lubridate')
library('magrittr') # this is just for the first pipe
dfTdata <- lapply(dfTdata, function(x) as.POSIXct(x, origin = '1970-01-01')) %>% as.data.frame(.)
funcDiff <- function(df_col, data = dfTdata, give_back = ''){
othernames <- names(dfTdata)[!names(dfTdata) == df_col]
coldifs <- lapply(othernames, function(x) mean(abs(data[[x]] - data[[df_col]]), na.rm = TRUE))
names(coldifs) <- othernames
min_col_ind <- which(unlist(coldifs) == min(unlist(coldifs)))
min_col <- names(min_col_ind)
if(give_back == 'name'){
return(min_col)
}else if(give_back == 'info'){
return(coldifs[min_col])
}else{
return(data[[min_col]])
}
}
# Use like this
funcDiff('TrWindDateTime')
funcDiff('TrWindDateTime', give_back = 'info')
为了给您提供另一个选项,下面是一个使用data.table
包执行此操作的版本。在下面的函数中,结果将是差异和列的名称。您还可以指定您希望差异所在的单位。
library(data.table)
#------------------------------------------------------
# Turn into a data.table and convert to datetime class
#------------------------------------------------------
dfTdata <- as.data.table(dfTdata)
nms <- names(dfTdata)
dfTdata[, (nms) := lapply(.SD, as.POSIXct, origin = '1970-01-01'), .SDcol = nms]
#--------------------------------
# Function to calculate min time
#--------------------------------
findMinAvgTime <- function(ds, var, unit = 'mins'){
difunits <- c("auto", "secs", "mins", "hours", "days", "weeks")
stopifnot(unit %in% difunits, !('data.table' %in% class(ds)),
!(var %in% names(ds)))
# Variables to subtract on
othVras <- setdiff(names(ds), var)
# Calculate Differences and Mean
res <-
ds[, lapply(.SD, function(x)
mean(as.numeric(difftime(get(var), x, units = unit)),
na.rm = TRUE)),
.SDcol = othVras]
# Unlist the table to a vector
minres <- unlist(res[, (var) := NULL])
return(minres[minres == min(minres)])
}
#--------------------------------
# Run Function
#--------------------------------
findMinAvgTime(dfTdata, 'TrWindDateTime')