如何将大于3位的字符变量转换为r中的数字



我想将变量v1从字符转换为数字。v1中的值是数字。

我试过了:

v1 <- as.numeric(v1)

这将变量更改为数字,但将所有值更改为>NA.的3位数字

> dput(dat)
structure(list(X = c("Baldwin.County", "Banks.County", "Barrow.County", 
"Bibb.County", "Butts.County", "Clarke.County", "Columbia.County", 
"Dawson.County", "DeKalb.County", "Elbert.County", "Forsyth.County", 
"Franklin.County", "Glascock.County", "Greene.County", 
"Gwinnett.County", 
"Habersham.County", "Hall.County", "Hancock.County", "Hart.County", 
"Henry.County", "Jackson.County", "Jasper.County", "Jefferson.County", 
"Jones.County", "Lamar.County", "Lincoln.County", "Lumpkin.County", 
"McDuffie.County", "Madison.County", "Monroe.County", "Morgan.County", 
"Newton.County", "Oconee.County", "Oglethorpe.County", "Putnam.County", 
"Rabun.County", "Richmond.County", "Rockdale.County", "Spalding.County", 
"Stephens.County", "Taliaferro.County", "Towns.County", "Union.County", 
"Walton.County", "Warren.County", "Washington.County", "White.County", 
"Wilkes.County", "Wilkinson.County"), total = c("11,936", "333", 
"6,285", "50,801", "4,767", "21,606", "17,549", "117", "270,370", 
"3,719", "5,508", "1,138", "185", "3,913", "159,130", "910", 
"9,417", "4,687", "3,579", "65,739", "3,400", "2,037", "5,398", 
"4,981", "3,210", "1,698", "366", "5,394", "1,757", "4,124", 
"3,117", "30,641", "1,312", "1,799", "3,864", "252", "72,566", 
"32,136", "14,073", "1,840", "742", "108", "155", "10,220", "2,221", 
"7,293", "605", "2,801", "2,358"), malet = c("5,996", "166", 
"2,957", "22,113", "2,889", "9,160", "8,268", "105", "118,932", 
"1,688", "2,536", "511", " 54", "1,661", "71,095", "255", "4,410", 
"2,605", "1,728", "28,442", "1,810", "960", "2,378", "2,358", 
"1,426", "709", "178", "2,358", "916", "1,928", "1,325", "13,197", 
"684", "820", "1,830", "209", "32,360", "13,739", "6,127", "852", 
"358", " 41", "123", "4,545", "1,031", "3,528", "157", "1,255", 
"1,089"), m1 = c("1,164", " 63", "476", "4,144", "1,050", "2,017", 
"520", " 29", "13,043", "382", "130", "63", " 41", "365", "4,129", 
" 35", "820", "1,134", "293", "2,430", "351", "215", "470", "180", 
"156", "188", " 28", "630", "249", "606", "301", "1,681", "123", 
"216", "206", " 49", "5,876", "1,012", "1,358", "53", " 97", 
"  0", " 29", "954", "377", "896", " 48", "283", "94")), class = 
"data.frame", row.names = c(NA, -49L))

您可以对所有包含数字(但不是数字类型(的变量执行此操作,因此:

library(dplyr)
library(readr)
dat %>%
mutate(across(where( ~ any(str_detect(.,","))), ~ parse_number(.)))
X  total  malet    m1
1     Baldwin.County  11936   5996  1164
2       Banks.County    333    166    63
3      Barrow.County   6285   2957   476
4        Bibb.County  50801  22113  4144
5       Butts.County   4767   2889  1050
6      Clarke.County  21606   9160  2017
7    Columbia.County  17549   8268   520
8      Dawson.County    117    105    29
9      DeKalb.County 270370 118932 13043
10     Elbert.County   3719   1688   382
11    Forsyth.County   5508   2536   130
12   Franklin.County   1138    511    63
13   Glascock.County    185     54    41
14     Greene.County   3913   1661   365
15   Gwinnett.County 159130  71095  4129
16  Habersham.County    910    255    35
17       Hall.County   9417   4410   820
18    Hancock.County   4687   2605  1134
19       Hart.County   3579   1728   293
20      Henry.County  65739  28442  2430
21    Jackson.County   3400   1810   351
22     Jasper.County   2037    960   215
23  Jefferson.County   5398   2378   470
24      Jones.County   4981   2358   180
25      Lamar.County   3210   1426   156
26    Lincoln.County   1698    709   188
27    Lumpkin.County    366    178    28
28   McDuffie.County   5394   2358   630
29    Madison.County   1757    916   249
30     Monroe.County   4124   1928   606
31     Morgan.County   3117   1325   301
32     Newton.County  30641  13197  1681
33     Oconee.County   1312    684   123
34 Oglethorpe.County   1799    820   216
35     Putnam.County   3864   1830   206
36      Rabun.County    252    209    49
37   Richmond.County  72566  32360  5876
38   Rockdale.County  32136  13739  1012
39   Spalding.County  14073   6127  1358
40   Stephens.County   1840    852    53
41 Taliaferro.County    742    358    97
42      Towns.County    108     41     0
43      Union.County    155    123    29
44     Walton.County  10220   4545   954
45     Warren.County   2221   1031   377
46 Washington.County   7293   3528   896
47      White.County    605    157    48
48     Wilkes.County   2801   1255   283
49  Wilkinson.County   2358   1089    94

或者,您可以先删除逗号,然后显式转换为数字,因此:

dat %>%
mutate(across(where( ~ any(str_detect(.,","))), ~ as.numeric(sub(",", "", .))))

如果将%>% str()添加到管道中,您将看到包含数字的所有三个变量都已转换为数字。

如果你想转换成数字,只有一个特定的列:

dat %>%
mutate(m1 = parse_number(m1))

或:

dat %>%
mutate(m1 = as.numeric(sub(",", "", m1)))

最新更新