我有一个看起来像这样的数据帧
flight_no takeoff_time land_time airtime origin_lat origin_lon dest_lat dest_lon
AAA 2019-03-04 06:18:00 2019-03-04 07:52:00 94 33.63667 -84.442778 41.97444 -87.90667
....
数据
structure(list(flight_no = c("AAA", "BBB", "CCC",
"DDD", "EEE", "FFF", "GGG"), takeoff_time = structure(c(1551698280,
1551707340, 1551749100, 1551716580, 1551724140, 1551733260, 1551740100
), tzone = "America/New_York", class = c("POSIXct", "POSIXt")),
land_time = structure(c(1551703920, 1551712740, 1551752220,
1551720420, 1551727980, 1551736500, 1551743760), tzone = "America/New_York", class = c("POSIXct",
"POSIXt")), AirTime = c(94, 90, 52, 64, 64, 54, 61), origin_lat = c(33.63666667,
41.97444444, 33.63666667, 33.63666667, 39.71722222, 33.63666667,
29.18), origin_lon = c(-84.42777778, -87.90666667, -84.42777778,
-84.42777778, -86.29472222, -84.42777778, -81.05805556),
dest_lat = c(41.97444444, 33.63666667, 38.17416667, 39.71722222,
33.63666667, 29.18, 33.63666667), dest_lon = c(-87.90666667,
-84.42777778, -85.73638889, -86.29472222, -84.42777778, -81.05805556,
-84.42777778)), row.names = c(NA, -7L), class = c("tbl_df",
"tbl", "data.frame")
我想假设起点和目的地之间的直接路径,并每分钟检索一次航班坐标。我创建了以下函数
get_coords <- function(df){
n = df %>% select(airtime) + 1
df %>%
pivot_longer(cols = c(origin_lon,origin_lat,dest_lon,dest_lat),
names_to = c('col', '.value'),
names_sep = '_') %>%
group_by(flight_no) %>%
summarise(datetime = list(seq(land_time, takeoff_time, length.out = n)),
lon = list(seq(max(lon), min(lon), length.out = n)),
lat = list(seq(min(lat), max(lat), length.out = n))) %>%
unnest(cols = c(lat, lon))
}
library(dplyr)
library(purrr)
df %>%
group_split(rn = row_number(), .keep = FALSE) %>%
map_dfr(get_coordinates)
这将完成这项工作,并返回一个数据帧,其中包含每分钟的flight_no、经度和纬度值,但我还想包括每条记录的日期和时间值。
输出
flight_no datetime lon lat
<chr> <dbl> <dbl>
AAA 2019-03-04 06:18:00 -84.42778 33.63667
AAA 2019-03-04 06:19:00 -84.46479 33.72537
AAA 2019-03-04 06:20:00 -84.50180 33.81407
AAA 2019-03-04 06:21:00 -84.53881 33.90277
AAA 2019-03-04 06:22:00 -84.57582 33.99147
AAA 2019-03-04 06:23:00 -84.61283 34.08017
AAA 2019-03-04 06:24:00 -84.64984 34.16887
AAA 2019-03-04 06:25:00 -84.68685 34.25756
AAA 2019-03-04 06:26:00 -84.72386 34.34626
这里有一种方法:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = c(origin_lon,origin_lat,dest_lon,dest_lat),
names_to = c('col', '.value'),
names_sep = '_') %>%
group_by(flight_no) %>%
summarise(datetime = list(seq(first(takeoff_time),
first(land_time), length.out = first(AirTime))),
lon = list(seq(max(lon), min(lon), length.out = first(AirTime))),
lat = list(seq(min(lat), max(lat), length.out = first(AirTime)))) %>%
unnest(cols = c(lat, lon, datetime))
# flight_no datetime lon lat
# <chr> <dttm> <dbl> <dbl>
# 1 AAA 2019-03-04 06:18:00 -84.4 33.6
# 2 AAA 2019-03-04 06:19:00 -84.5 33.7
# 3 AAA 2019-03-04 06:20:01 -84.5 33.8
# 4 AAA 2019-03-04 06:21:01 -84.5 33.9
# 5 AAA 2019-03-04 06:22:02 -84.6 34.0
# 6 AAA 2019-03-04 06:23:03 -84.6 34.1
# 7 AAA 2019-03-04 06:24:03 -84.7 34.2
# 8 AAA 2019-03-04 06:25:04 -84.7 34.3
# 9 AAA 2019-03-04 06:26:05 -84.7 34.4
#10 AAA 2019-03-04 06:27:05 -84.8 34.4
# … with 469 more rows
获取长格式数据,并在takeoff_time
和land_time
之间创建序列以及纬度和经度。