因为样本很大,所以我在问题底部放了一个dput
代码块来导入我的数据样本。
我正在尝试创建一个条形图,显示此数据集的事件百分比(称为"查找代码"(。在p01
中,我只查看具有查找代码的事件(省略"查找代码"为"n/a"的位置(。在p02
中,我想查看所有事件并绘制计数和百分比,包括"LookupCode"为"n/a"的位置,但从条形图中省略这些值。(又名:我不希望在 x 轴上显示 NA 的大条(。
关于如何做到这一点的任何想法?
谢谢。要遵循的代码:
library(tidyverse)
library(scales)
# plots p01 and p02
p01 <- ggplot(df %>% filter(!is.na(LookupCode)), aes(fct_infreq(LookupCode), fill=LookupCode)) +
geom_bar(stat='count') +
geom_text(stat='count', aes(label=scales::percent((..count..)/sum(..count..))), vjust='inward') +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5), legend.position='none') +
labs(title = "Count and Percent of Lookup Codes", x = "Lookup Code", y = "Count")
p02 <- ggplot(df, aes(fct_infreq(LookupCode), fill=LookupCode)) +
geom_bar(stat='count') +
geom_text(stat='count', aes(label=scales::percent((..count..)/sum(..count..))), vjust='inward') +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5), legend.position='none') +
labs(title = "Count and Percent of Lookup Codes", x = "Lookup Code", y = "Count")
# sample data
df <- structure(list(ReleaseYear = c(2016, 2017, 2018, 2017, 2019,
2019, 2019, 2017, 2019, 2017, 2016, 2017, 2017, 2018, 2018, 2017,
2019, 2017, 2017, 2017, 2017, 2016, 2017, 2018, 2019, 2016, 2016,
2016, 2018, 2018, 2019, 2017, 2016, 2018, 2019, 2018, 2017, 2016,
2018, 2017, 2018, 2016, 2018, 2019, 2018, 2018, 2019, 2016, 2018,
2019), ReleaseMonth = c("SEPT", "APRIL", "AUGUST", "JUNE", "JAN",
"JAN", "AUGUST", "MARCH", "FEB", "APRIL", "NOV", "AUGUST", "DEC",
"FEB", "FEB", "JUNE", "MAY", "MARCH", "AUGUST", "FEB", "DEC",
"OCT", "AUGUST", "JULY", "APRIL", "MARCH", "SEPT", "NOV", "NOV",
"JULY", "AUGUST", "JAN", "DEC", "APRIL", "MARCH", "MAY", "JAN",
"JULY", "JUNE", "FEB", "AUGUST", "NOV", "MARCH", "FEB", "SEPT",
"NOV", "MAY", "NOV", "MARCH", "FEB"), ProductionOrder = c(10026795,
10027932, 10032532, 10029147, 10033613, 10033771, 10035329, 10028252,
10033714, 10027859, 10026658, 10029516, 10030769, 10028211, 10031074,
10028976, 10034237, 10028414, 10029408, 100296220, 10030650,
10027200, 10029249, 10032374, 10034502, 10024691, 10026743, 10027112,
10033068, 10032391, 10035711, 10027797, 10026638, 10031533, 10034233,
10031882, 10027822, 10026203, 10032144, 10028241, 10031825, 10026656,
10031067, 10034340, 10032801, 10033399, 10034876, 10027364, 10031486,
10034002), NilesHeatNo = c("8J47674", "8J51003", "8G58509", "8H51788",
"8J60248", "8G60351", "9J63427", "8J50670", "8T60855", "8F50950",
"8G48562", "8G52372", "9H54615", "8J55715", "8K55529", "8G51685",
"8G62172", "8H50464", "8J52323", "8H50164", "8G54596", "9G48197",
"9H52494", "8G57871", "8V61894", "8H45452", "9G47724", "8G48507",
"8T59690", "8G57886", "8H63385", "8F49662", "8F49020", "8G56373",
"8H61652", "8J57119", "8J49638", "8J46954", "8J57651", "8F49986",
"8G58447", "8G48520", "8G56064", "8H61297", "8G58851", "8G59461",
"8G62447", "8J48584", "8H56190", "8R60756"), LookupCode = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "W4 - PROCESS ERROR",
NA, NA, "U0 - EQUIPMENT BREAK DOWN", NA, NA, NA, NA, NA, "C1 - OXYGEN - HIGH",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, "C2 - OXYGEN - LOW", "A2 - ALUMINUM - LOW",
NA, NA, NA, "A1 - ALUMINUM - HIGH"), ScrapWeight = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, 0, NA,
NA, NA, NA, NA, 13779, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, NA, NA, NA,
0), Melt = c("8J", "8J", "8G", "8H", "8J", "8G", "9J", "8J",
"8T", "8F", "8G", "8G", "9H", "8J", "8K", "8G", "8G", "8H", "8J",
"8H", "8G", "9G", "9H", "8G", "8V", "8H", "9G", "8G", "8T", "8G",
"8H", "8F", "8F", "8G", "8H", "8J", "8J", "8J", "8J", "8F", "8G",
"8G", "8G", "8H", "8G", "8G", "8G", "8J", "8H", "8R"), MeltNo = c(47674,
51003, 58509, 51788, 60248, 60351, 63427, 50670, 60855, 50950,
48562, 52372, 54615, 55715, 55529, 51685, 62172, 50464, 52323,
50164, 54596, 48197, 52494, 57871, 61894, 45452, 47724, 48507,
59690, 57886, 63385, 49662, 49020, 56373, 61652, 57119, 49638,
46954, 57651, 49986, 58447, 48520, 56064, 61297, 58851, 59461,
62447, 48584, 56190, 60756), NilesWeight = c(20359, 20797, 19342,
20585, 17629, 19770, 11776, 20167, NA, 18622, 20401, 19292, 13524,
16090, 13605, 20099, 20065, 20893, 20659, 20698, 18528, 14016,
13779, 20701, 19415, 20318, 15152, 20601, 19380, 20032, 19532,
20395, 19410, 19739, 20728, 18536, 13841, 20478, 20777, 20269,
17682, 20890, 20344, 19269, 17858, 18101, 20376, 13672, 20427,
13100), CantonWeight = c(NA, NA, NA, NA, NA, NA, NA, NA, 20235,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), Grade = c("766FW", "766FK",
"764KS", "764FK", "766BK", "766BK", "766SS", "766BL", "766GK",
"766JS", "766BL", "766JK", "764SK", "745TS", "764KK", "766BJ",
"766BK", "765BU", "779KJ", "779BV", "766GK", "766TW", "766TJ",
"766BK", "766BK", "766BV", "766NS", "766GL", "766BK", "766JK",
"764KK", "764KW", "766BS", "766BK", "766BK", "763BK", "749BW",
"766JJ", "765BV", "766FS", "766JJ", "766BL", "779BS", "766BK",
"766JS", "764KK", "766JK", "764KL", "765BL", "766IK"), MeltDate = structure(c(17030,
17269, 17759, 17330, 17876, 17882, 18110, 17243, 17921, 17265,
17100, 17374, 17516, 17577, 17569, 17323, 18005, 17233, 17371,
17212, 17515, 17071, 17384, 17718, 17988, 16841, 17034, 17096,
17845, 17719, 18107, 17180, 17131, 17616, 17972, 17665, 17178,
16976, 17702, 17202, 17755, 17098, 17597, 17946, 17785, 17832,
18026, 17101, 17605, 17922), class = "Date"), MeltControlRelease = structure(c(1472515200,
1492732800, 1535068800, 1498176000, 1545782400, 1548892800, 1565740800,
1490572800, 1549324800, 1492560000, 1478131200, 1502236800, 1514505600,
1522195200, 1525305600, 1498780800, 1556496000, 1489536000, 1502064000,
1492041600, 1513728000, 1475712000, 1503878400, 1531440000, 1555027200,
1456790400, 1472428800, 1478131200, 1542758400, 1532304000, 1564963200,
1484784000, 1480464000, 1522713600, 1554249600, 1526860800, 1484784000,
1468281600, 1529971200, 1486684800, 1535500800, 1478131200, 1522800000,
1551830400, 1538611200, 1542326400, 1558483200, 1478649600, 1521417600,
1549843200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
O2Range = c("17/20", "17/20", "14/19", "16/20", "16/20",
"16/20", NA, "17/20", "18/21", "17/20", NA, "17/20", NA,
NA, "13/19", "16/20", "17/20", "10/12", "12 max", "12 max",
"17/20", "17/20", "17/20", "17/20", "17/20", "16/20", "17/20",
"19/22", "17/20", "17/20", "13/19", "14/19", "17/20", "17/20",
"17/20", "17/20", "32/37", "17/20", NA, "14/17", "17/20",
"17/20", "9/12", "16/20", "17/20", "8/14", "17/20", "14/19",
"10/13", "17/20"), ScrapPct = c("50(T)", "50", "70", "50",
"70", "60", NA, "60", "50", "50(T)", NA, "70", NA, "0", "TURNINGS PUCKS",
"70", "70", "50", "0", "50", "50", "0", "0", "50", "70",
"50", "0", "0(T)", "70", "50", "70(T)", "50", "70", "70",
"70", "0", "28", "50(T)", "28", "28", "28", "50", "50", "70",
"28", "28", "50(T)", "50", "28", "50"), ReasonLate = c(NA,
NA, NA, NA, NA, NA, "remelt of 9J62399", NA, NA, NA, NA,
NA, "remelt of 8R54007", "Water spot on intermediate cast, use for non-rotor",
NA, NA, "plant power outage, restrike, melt to 42"", NA,
NA, NA, NA, NA, "High O2 top and bottom", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "Low O2 ingot, apply to a different order", "Low Al/V. Appley to a different order",
NA, NA, NA, "High Al"), O2High = list("20", "20", "19", "20",
"20", "20", NA_character_, "20", "21", "20", NA_character_,
"20", NA_character_, NA_character_, "19", "20", "20",
"12", NA_character_, NA_character_, "20", "20", "20",
"20", "20", "20", "20", "22", "20", "20", "19", "19",
"20", "20", "20", "20", "37", "20", NA_character_, "17",
"20", "20", "12", "20", "20", "14", "20", "19", "13",
"20"), O2Low = list("17", "17", "14", "16", "16", "16",
NA_character_, "17", "18", "17", NA_character_, "17",
NA_character_, NA_character_, "13", "16", "17", "10",
NA_character_, NA_character_, "17", "17", "17", "17",
"17", "16", "17", "19", "17", "17", "13", "14", "17",
"17", "17", "17", "32", "17", NA_character_, "14", "17",
"17", "9", "16", "17", "8", "17", "14", "10", "17"),
Turnings = c(1, 0, 0, 0, 0, 0, NA, 0, 0, 1, NA, 0, NA, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), Furnace = c("J",
"J", "G", "H", "J", "G", "J", "J", "T", "F", "G", "G", "H",
"J", "K", "G", "G", "H", "J", "H", "G", "G", "H", "G", "V",
"H", "G", "G", "T", "G", "H", "F", "F", "G", "H", "J", "J",
"J", "J", "F", "G", "G", "G", "H", "G", "G", "G", "J", "H",
"R"), DailyAverageRelativeHumidity = c(85L, 57L, 80L, 66L,
76L, 78L, NA, 93L, 62L, 34L, 76L, 72L, 76L, 91L, 82L, 80L,
91L, 48L, 78L, 78L, 76L, 59L, 78L, 72L, 46L, 79L, 72L, 77L,
72L, 63L, 80L, 70L, 82L, 49L, 53L, 90L, 89L, 58L, 86L, 54L,
70L, 72L, 79L, 68L, 87L, 93L, 83L, 90L, 67L, 81L), DailyAverageDewPointTemperature = c(68L,
33L, 68L, 67L, 22L, 28L, NA, 32L, 5L, 29L, 32L, 63L, 24L,
52L, 16L, 52L, 50L, 26L, 61L, 24L, 12L, 44L, 57L, 60L, 19L,
15L, 62L, 37L, 20L, 52L, 66L, 20L, 32L, 22L, 16L, 59L, 46L,
54L, 63L, 18L, 60L, 39L, 27L, 12L, 60L, 42L, 51L, 44L, 21L,
14L)), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
如果创建中间数据帧,则最简单。
df_plot <- df %>%
group_by(LookupCode) %>%
summarise(count = n()) %>%
mutate(percent =100*count/sum(count))
现在,如果您仍然想知道原始百分比,您可以过滤NA。
ggplot(df_plot %>% filter(!is.na(LookupCode)), aes(x=LookupCode, y = count)) +
geom_col() + geom_text(df_plot, mapping = aes(label = percent))
并添加任何主题和细节