我有一个极其复杂的数据帧,当它被组织成一个数据帧时,看起来像这样:
HEADER
<chr>
13:30:00.587: <- $GPGGA,183000.30,4415.6243,N,08823.9769,W,1,7,1.7,225.5,M,-33.4,M,,*68
13:30:00.683: <- $GPGLL,4415.6243,N,08823.9769,W,183000.40,A,A*72
13:30:00.779: <- $GPVTG,159.6,T,163.2,M,0.1,N,0.1,K,A*2E
13:30:00.827: <- $HCHDG,74.8,0.0,E,3.6,W*6E
13:30:01.003: <- $WIMDA,29.9641,I,1.0147,B,26.5,C,,,48.2,,14.6,C,323.0,T,326.6,M,1.4,N,0.7,M*66
13:30:01.051: <- $WIMWV,248.4,R,1.1,N,A*29
13:30:01.114: <- $WIMWV,255.6,T,1.3,N,A*23
13:30:01.195: <- $YXXDR,A,-53.9,D,PTCH,A,-34.2,D,ROLL*57
13:30:01.307: <- $YXXDR,A,0.571,G,XACC,A,0.783,G,YACC,A,-0.181,G,ZACC*57
13:30:01.578: <- $GPGGA,183001.30,4415.6242,N,08823.9769,W,1,7,1.7,225.9,M,-33.4,M,,*64
我需要把它组织起来,这样我就可以像GPGGA一样,在开头用5个字母的字符串分组,我已经用下面的代码做到了这一点:
df <- data.frame(Weather_data)
df_GPGGA <- df %>%
mutate("Entry" = gsub(".*\$([A-Z]+),.*", "\1", HEADER)) %>%
group_by("Entry") %>%
filter(Entry == "GPGGA")
因为这输出具有如上所述的一个长串中的所有数据的2列df作为列"df";标题";这是目前的ok标题;条目";按特定行中精确的5个字母的字符串分组(请参阅下面的屏幕截图(。
我的最终目标是能够为每个单独的数据集(由5个字母的字符串分隔,因此我使用了过滤器;我为每个不同的5个字母字符串重复了这个代码。我正在使用的仪器的GPGGA代码的每一行的输出并不是与GPGLL行的输出完全相同的信息(指定列。但我不知道如何从那里格式化每个文档。以下是一次尝试的屏幕截图:.csv,没有数据分隔。如何将所有这些数据放到单独的列中?
这是要求的可复制数据集:
dput(Weather_data(输出该图像
这里有一个开始:
library(dplyr)
stuff <- strcapture("^([0-9.:]*): <-\s*(\$[^,]+),(.*)", dat$HEADER,
list(time = "", group = "", string = "")) %>%
transmute(group, string = paste0(time, ",", string)) %>%
group_by(group) %>%
summarize(data = lapply(paste(string, collapse = "n"),
function(z) read.csv(text = z, header = FALSE)))
stuff
# # A tibble: 7 x 2
# group data
# <chr> <list>
# 1 $GPGGA <df[,15] [2 x 15]>
# 2 $GPGLL <df[,8] [1 x 8]>
# 3 $GPVTG <df[,10] [1 x 10]>
# 4 $HCHDG <df[,6] [1 x 6]>
# 5 $WIMDA <df[,21] [1 x 21]>
# 6 $WIMWV <df[,6] [2 x 6]>
# 7 $YXXDR <df[,13] [2 x 13]>
stuff$data[[1]]
# V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
# 1 13:30:00.587 183000.3 4415.624 N 8823.977 W 1 7 1.7 225.5 M -33.4 M NA *68
# 2 13:30:01.578 183001.3 4415.624 N 8823.977 W 1 7 1.7 225.9 M -33.4 M NA *64
数据:
dat <- structure(list(HEADER = c("13:30:00.587: <- $GPGGA,183000.30,4415.6243,N,08823.9769,W,1,7,1.7,225.5,M,-33.4,M,,*68 ", "13:30:00.683: <- $GPGLL,4415.6243,N,08823.9769,W,183000.40,A,A*72 ", "13:30:00.779: <- $GPVTG,159.6,T,163.2,M,0.1,N,0.1,K,A*2E ", "13:30:00.827: <- $HCHDG,74.8,0.0,E,3.6,W*6E ", "13:30:01.003: <- $WIMDA,29.9641,I,1.0147,B,26.5,C,,,48.2,,14.6,C,323.0,T,326.6,M,1.4,N,0.7,M*66 ", "13:30:01.051: <- $WIMWV,248.4,R,1.1,N,A*29 ", "13:30:01.114: <- $WIMWV,255.6,T,1.3,N,A*23 ", "13:30:01.195: <- $YXXDR,A,-53.9,D,PTCH,A,-34.2,D,ROLL*57 ", "13:30:01.307: <- $YXXDR,A,0.571,G,XACC,A,0.783,G,YACC,A,-0.181,G,ZACC*57 ", "13:30:01.578: <- $GPGGA,183001.30,4415.6242,N,08823.9769,W,1,7,1.7,225.9,M,-33.4,M,,*64")), class = "data.frame", row.names = c(NA, -10L))