以 r 为单位计算相对于时间的纬度坐标



我有一个看起来像这样的数据帧

flight_no    takeoff_time          land_time           airtime    origin_lat    origin_lon    dest_lat     dest_lon
AAA         2019-03-04 06:18:00   2019-03-04 07:52:00   94         33.63667    -84.442778    41.97444     -87.90667
....

数据

structure(list(flight_no = c("AAA", "BBB", "CCC", 
"DDD", "EEE", "FFF", "GGG"), takeoff_time = structure(c(1551698280, 
1551707340, 1551749100, 1551716580, 1551724140, 1551733260, 1551740100
), tzone = "America/New_York", class = c("POSIXct", "POSIXt")), 
land_time = structure(c(1551703920, 1551712740, 1551752220, 
1551720420, 1551727980, 1551736500, 1551743760), tzone = "America/New_York", class = c("POSIXct", 
"POSIXt")), AirTime = c(94, 90, 52, 64, 64, 54, 61), origin_lat = c(33.63666667, 
41.97444444, 33.63666667, 33.63666667, 39.71722222, 33.63666667, 
29.18), origin_lon = c(-84.42777778, -87.90666667, -84.42777778, 
-84.42777778, -86.29472222, -84.42777778, -81.05805556), 
dest_lat = c(41.97444444, 33.63666667, 38.17416667, 39.71722222, 
33.63666667, 29.18, 33.63666667), dest_lon = c(-87.90666667, 
-84.42777778, -85.73638889, -86.29472222, -84.42777778, -81.05805556, 
-84.42777778)), row.names = c(NA, -7L), class = c("tbl_df", 
"tbl", "data.frame")

我想假设起点和目的地之间的直接路径,并每分钟检索一次航班坐标。我创建了以下函数

get_coords <- function(df){
n = df %>% select(airtime) + 1
df %>%
pivot_longer(cols = c(origin_lon,origin_lat,dest_lon,dest_lat), 
names_to = c('col', '.value'),
names_sep = '_') %>%
group_by(flight_no) %>%
summarise(datetime = list(seq(land_time, takeoff_time, length.out = n)),
lon = list(seq(max(lon), min(lon), length.out = n)), 
lat = list(seq(min(lat), max(lat), length.out = n))) %>%
unnest(cols = c(lat, lon))
}
library(dplyr)
library(purrr)
df %>%
group_split(rn = row_number(), .keep = FALSE)  %>% 
map_dfr(get_coordinates)

这将完成这项工作,并返回一个数据帧,其中包含每分钟的flight_no、经度和纬度值,但我还想包括每条记录的日期和时间值。

输出

flight_no    datetime             lon        lat
<chr>      <dbl>  <dbl>
AAA         2019-03-04 06:18:00  -84.42778  33.63667        
AAA         2019-03-04 06:19:00  -84.46479  33.72537        
AAA         2019-03-04 06:20:00  -84.50180  33.81407        
AAA         2019-03-04 06:21:00  -84.53881  33.90277        
AAA         2019-03-04 06:22:00  -84.57582  33.99147        
AAA         2019-03-04 06:23:00  -84.61283  34.08017        
AAA         2019-03-04 06:24:00  -84.64984  34.16887        
AAA         2019-03-04 06:25:00  -84.68685  34.25756        
AAA         2019-03-04 06:26:00  -84.72386  34.34626

这里有一种方法:

library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = c(origin_lon,origin_lat,dest_lon,dest_lat), 
names_to = c('col', '.value'),
names_sep = '_') %>%
group_by(flight_no) %>%
summarise(datetime = list(seq(first(takeoff_time), 
first(land_time), length.out = first(AirTime))),
lon = list(seq(max(lon), min(lon), length.out = first(AirTime))), 
lat = list(seq(min(lat), max(lat), length.out = first(AirTime)))) %>%
unnest(cols = c(lat, lon, datetime))

#  flight_no datetime              lon   lat
#   <chr>     <dttm>              <dbl> <dbl>
# 1 AAA       2019-03-04 06:18:00 -84.4  33.6
# 2 AAA       2019-03-04 06:19:00 -84.5  33.7
# 3 AAA       2019-03-04 06:20:01 -84.5  33.8
# 4 AAA       2019-03-04 06:21:01 -84.5  33.9
# 5 AAA       2019-03-04 06:22:02 -84.6  34.0
# 6 AAA       2019-03-04 06:23:03 -84.6  34.1
# 7 AAA       2019-03-04 06:24:03 -84.7  34.2
# 8 AAA       2019-03-04 06:25:04 -84.7  34.3
# 9 AAA       2019-03-04 06:26:05 -84.7  34.4
#10 AAA       2019-03-04 06:27:05 -84.8  34.4
# … with 469 more rows

获取长格式数据,并在takeoff_timeland_time之间创建序列以及纬度和经度。

最新更新