我有这个数据集:
1 "MX000017004195504TMAX 310 I 310 I 310 I 320 I 330 I 320 I 320 I 330 I 330 I 330 I 330~
2 "MX000017004195504TMIN 150 I 150 I 160 I 150 I 160 I 160 I 160 I 160 I 160 I 170 I 170~
3 "MX000017004195504PRCP 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0~
4 "MX000017004195505TMAX 310 I 310 I 310 I 300 I 300 I 300 I 310 I 310 I 310 I 300 I 300~
5 "MX000017004195505TMIN 200 I 160 I 160 I 150 I 150 I 150 I 160 I 160 I 170 I 170 I 170~
6 "MX000017004195505PRCP 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0~
7 "MX000017004195506TMAX 300 I 290 I 280 I 270 I 280 I 260 I 230 I 270 I 280 I 280 I 270~
8 "MX000017004195506TMIN 160 I 160 I 150 I 140 I 160 I 150 I 150 I 150 I 160 I 150 I 160~
9 "MX000017004195506PRCP 103 I 435 I 422 I 0 I 243 I 17 I 0 I 0 I 0 I 27 I 0~
10 "MX000017004195507TMAX 270 I 270 I 260 I 260 I 240 I 240 I 190 I 210 I 250 I 250 I 260~
# ... with 1,704 more rows
我需要让"I"之后的每个字符串或数字列中的字母
我试着:
# To separate age to 2 columns
weather <- weather %>%
separate(weather$V1, into= c("Date", "values"), sep = "I")
但是没有成功。
你可以试试:
library(dplyr)
library(tidyr)
library(stringr)
weather %>%
transmute(Date = str_extract(V1, "^MX[\d|\w]+"),
values = str_split(str_remove(V1, "^MX[\d|\w]+"), "I")) %>%
unnest(values) %>%
mutate(values = str_trim(values))
返回
# A tibble: 110 x 2
Date values
<chr> <chr>
1 MX000017004195504TMAX 310
2 MX000017004195504TMAX 310
3 MX000017004195504TMAX 310
4 MX000017004195504TMAX 320
5 MX000017004195504TMAX 330
6 MX000017004195504TMAX 320
7 MX000017004195504TMAX 320
8 MX000017004195504TMAX 330
9 MX000017004195504TMAX 330
10 MX000017004195504TMAX 330
# ... with 100 more rows
根据数据structure(list(V1 = c("MX000017004195504TMAX 310 I 310 I 310 I 320 I 330 I 320 I 320 I 330 I 330 I 330 I 330",
"MX000017004195504TMIN 150 I 150 I 160 I 150 I 160 I 160 I 160 I 160 I 160 I 170 I 170",
"MX000017004195504PRCP 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0",
"MX000017004195505TMAX 310 I 310 I 310 I 300 I 300 I 300 I 310 I 310 I 310 I 300 I 300",
"MX000017004195505TMIN 200 I 160 I 160 I 150 I 150 I 150 I 160 I 160 I 170 I 170 I 170",
"MX000017004195505PRCP 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0 I 0",
"MX000017004195506TMAX 300 I 290 I 280 I 270 I 280 I 260 I 230 I 270 I 280 I 280 I 270",
"MX000017004195506TMIN 160 I 160 I 150 I 140 I 160 I 150 I 150 I 150 I 160 I 150 I 160",
"MX000017004195506PRCP 103 I 435 I 422 I 0 I 243 I 17 I 0 I 0 I 0 I 27 I 0",
"MX000017004195507TMAX 270 I 270 I 260 I 260 I 240 I 240 I 190 I 210 I 250 I 250 I 260"
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
`### Tidying the weather data
weather <- read.delim("D:/Projects/Tidy_data/weather.txt",header = F, sep = "", quote = ""'" ,stringsAsFactors = FALSE , skipNul = FALSE)
weather <- as.tibble(weather)
weather
### Making List of Column of "I" To Be Removed By Sequence
se = seq(3,63,2)
se
### Loop for Deleting "I" Columns from se sequence
library(stringr)
library(dplyr)
for(i in se) {
g <- glue::glue("V{i}")
weather[[g]] <- NULL
print(g)
}
weather `
Result is:
> weather
# A tibble: 1,714 x 32
V1 V2 V4 V6 V8 V10 V12 V14 V16 V18 V20 V22 V24 V26 V28 V30 V32 V34 V36
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 MX00001~ 310 310 310 320 330 320 320 330 330 330 330 320 310 310 320 320 320 310
2 MX00001~ 150 150 160 150 160 160 160 160 160 170 170 160 160 160 170 170 160 160
3 MX00001~ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 MX00001~ 310 310 310 300 300 300 310 310 310 300 300 310 300 310 320 320 330 330
5 MX00001~ 200 160 160 150 150 150 160 160 170 170 170 170 160 170 170 180 190 180
6 MX00001~ 0 0 0 0 0 0 0 0 0 0 0 12 24 0 0 0 0 0
7 MX00001~ 300 290 280 270 280 260 230 270 280 280 270 290 290 270 290 290 290 300
8 MX00001~ 160 160 150 140 160 150 150 150 160 150 160 180 170 140 160 150 160 170
9 MX00001~ 103 435 422 0 243 17 0 0 0 27 0 15 292 0 28 76 0 0
10 MX00001~ 270 270 260 260 240 240 190 210 250 250 260 240 250 250 270 240 230 240
# ... with 1,704 more rows, and 13 more variables: V38 <chr>, V40 <chr>, V42 <chr>, V44 <chr>, V46 <chr>, V48 <chr>,
# V50 <chr>, V52 <chr>, V54 <chr>, V56 <chr>, V58 <chr>, V60 <chr>, V62 <int>