r语言 - 如何从文本文件中读取数据,并将一列分离为多列



我有这个数据集:

1 "MX000017004195504TMAX  310  I  310  I  310  I  320  I  330  I  320  I  320  I  330  I  330  I  330  I  330~
2 "MX000017004195504TMIN  150  I  150  I  160  I  150  I  160  I  160  I  160  I  160  I  160  I  170  I  170~
3 "MX000017004195504PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0~
4 "MX000017004195505TMAX  310  I  310  I  310  I  300  I  300  I  300  I  310  I  310  I  310  I  300  I  300~
5 "MX000017004195505TMIN  200  I  160  I  160  I  150  I  150  I  150  I  160  I  160  I  170  I  170  I  170~
6 "MX000017004195505PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0~
7 "MX000017004195506TMAX  300  I  290  I  280  I  270  I  280  I  260  I  230  I  270  I  280  I  280  I  270~
8 "MX000017004195506TMIN  160  I  160  I  150  I  140  I  160  I  150  I  150  I  150  I  160  I  150  I  160~
9 "MX000017004195506PRCP  103  I  435  I  422  I    0  I  243  I   17  I    0  I    0  I    0  I   27  I    0~
10 "MX000017004195507TMAX  270  I  270  I  260  I  260  I  240  I  240  I  190  I  210  I  250  I  250  I  260~
# ... with 1,704 more rows

我需要让"I"之后的每个字符串或数字列中的字母

我试着:

# To separate age to 2 columns 
weather <- weather %>%
separate(weather$V1, into= c("Date", "values"), sep = "I")

但是没有成功。

你可以试试:

library(dplyr)
library(tidyr)
library(stringr)
weather %>% 
transmute(Date = str_extract(V1, "^MX[\d|\w]+"),
values = str_split(str_remove(V1, "^MX[\d|\w]+"), "I")) %>% 
unnest(values) %>% 
mutate(values = str_trim(values))

返回

# A tibble: 110 x 2
Date                  values
<chr>                 <chr> 
1 MX000017004195504TMAX 310   
2 MX000017004195504TMAX 310   
3 MX000017004195504TMAX 310   
4 MX000017004195504TMAX 320   
5 MX000017004195504TMAX 330   
6 MX000017004195504TMAX 320   
7 MX000017004195504TMAX 320   
8 MX000017004195504TMAX 330   
9 MX000017004195504TMAX 330   
10 MX000017004195504TMAX 330   
# ... with 100 more rows
根据

数据
structure(list(V1 = c("MX000017004195504TMAX  310  I  310  I  310  I  320  I  330  I  320  I  320  I  330  I  330  I  330  I  330", 
"MX000017004195504TMIN  150  I  150  I  160  I  150  I  160  I  160  I  160  I  160  I  160  I  170  I  170", 
"MX000017004195504PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0", 
"MX000017004195505TMAX  310  I  310  I  310  I  300  I  300  I  300  I  310  I  310  I  310  I  300  I  300", 
"MX000017004195505TMIN  200  I  160  I  160  I  150  I  150  I  150  I  160  I  160  I  170  I  170  I  170", 
"MX000017004195505PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0", 
"MX000017004195506TMAX  300  I  290  I  280  I  270  I  280  I  260  I  230  I  270  I  280  I  280  I  270", 
"MX000017004195506TMIN  160  I  160  I  150  I  140  I  160  I  150  I  150  I  150  I  160  I  150  I  160", 
"MX000017004195506PRCP  103  I  435  I  422  I    0  I  243  I   17  I    0  I    0  I    0  I   27  I    0", 
"MX000017004195507TMAX  270  I  270  I  260  I  260  I  240  I  240  I  190  I  210  I  250  I  250  I  260"
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
`### Tidying the weather data
weather <- read.delim("D:/Projects/Tidy_data/weather.txt",header = F, sep = "", quote = ""'" ,stringsAsFactors = FALSE , skipNul = FALSE)
weather <- as.tibble(weather)
weather

### Making List of Column of "I" To Be Removed By Sequence
se = seq(3,63,2)
se

### Loop for Deleting "I" Columns from se sequence
library(stringr)
library(dplyr)
for(i in se) {
g <-  glue::glue("V{i}")
weather[[g]] <- NULL
print(g)
}
weather `
Result is:
> weather
# A tibble: 1,714 x 32
V1       V2    V4    V6    V8    V10   V12   V14   V16   V18   V20   V22   V24   V26   V28   V30   V32   V34   V36  
<chr>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 MX00001~ 310   310   310   320   330   320   320   330   330   330   330   320   310   310   320   320   320   310  
2 MX00001~ 150   150   160   150   160   160   160   160   160   170   170   160   160   160   170   170   160   160  
3 MX00001~ 0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0    
4 MX00001~ 310   310   310   300   300   300   310   310   310   300   300   310   300   310   320   320   330   330  
5 MX00001~ 200   160   160   150   150   150   160   160   170   170   170   170   160   170   170   180   190   180  
6 MX00001~ 0     0     0     0     0     0     0     0     0     0     0     12    24    0     0     0     0     0    
7 MX00001~ 300   290   280   270   280   260   230   270   280   280   270   290   290   270   290   290   290   300  
8 MX00001~ 160   160   150   140   160   150   150   150   160   150   160   180   170   140   160   150   160   170  
9 MX00001~ 103   435   422   0     243   17    0     0     0     27    0     15    292   0     28    76    0     0    
10 MX00001~ 270   270   260   260   240   240   190   210   250   250   260   240   250   250   270   240   230   240  
# ... with 1,704 more rows, and 13 more variables: V38 <chr>, V40 <chr>, V42 <chr>, V44 <chr>, V46 <chr>, V48 <chr>,
#   V50 <chr>, V52 <chr>, V54 <chr>, V56 <chr>, V58 <chr>, V60 <chr>, V62 <int>

最新更新