r语言 - ggplot2 条形图 - 从绘图输出中省略一个因子



因为样本很大,所以我在问题底部放了一个dput代码块来导入我的数据样本。

我正在尝试创建一个条形图,显示此数据集的事件百分比(称为"查找代码"(。在p01中,我只查看具有查找代码的事件(省略"查找代码"为"n/a"的位置(。在p02中,我想查看所有事件并绘制计数和百分比,包括"LookupCode"为"n/a"的位置,但从条形图中省略这些值。(又名:我不希望在 x 轴上显示 NA 的大条(。

关于如何做到这一点的任何想法?

谢谢。要遵循的代码:

library(tidyverse)
library(scales)
# plots p01 and p02
p01 <- ggplot(df %>% filter(!is.na(LookupCode)), aes(fct_infreq(LookupCode), fill=LookupCode)) +
        geom_bar(stat='count') + 
        geom_text(stat='count', aes(label=scales::percent((..count..)/sum(..count..))), vjust='inward') + 
        theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5), legend.position='none') + 
        labs(title = "Count and Percent of Lookup Codes", x = "Lookup Code", y = "Count")
p02 <- ggplot(df, aes(fct_infreq(LookupCode), fill=LookupCode)) +
        geom_bar(stat='count') + 
        geom_text(stat='count', aes(label=scales::percent((..count..)/sum(..count..))), vjust='inward') + 
        theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5), legend.position='none') + 
        labs(title = "Count and Percent of Lookup Codes", x = "Lookup Code", y = "Count")
# sample data
df <- structure(list(ReleaseYear = c(2016, 2017, 2018, 2017, 2019, 
2019, 2019, 2017, 2019, 2017, 2016, 2017, 2017, 2018, 2018, 2017, 
2019, 2017, 2017, 2017, 2017, 2016, 2017, 2018, 2019, 2016, 2016, 
2016, 2018, 2018, 2019, 2017, 2016, 2018, 2019, 2018, 2017, 2016, 
2018, 2017, 2018, 2016, 2018, 2019, 2018, 2018, 2019, 2016, 2018, 
2019), ReleaseMonth = c("SEPT", "APRIL", "AUGUST", "JUNE", "JAN", 
"JAN", "AUGUST", "MARCH", "FEB", "APRIL", "NOV", "AUGUST", "DEC", 
"FEB", "FEB", "JUNE", "MAY", "MARCH", "AUGUST", "FEB", "DEC", 
"OCT", "AUGUST", "JULY", "APRIL", "MARCH", "SEPT", "NOV", "NOV", 
"JULY", "AUGUST", "JAN", "DEC", "APRIL", "MARCH", "MAY", "JAN", 
"JULY", "JUNE", "FEB", "AUGUST", "NOV", "MARCH", "FEB", "SEPT", 
"NOV", "MAY", "NOV", "MARCH", "FEB"), ProductionOrder = c(10026795, 
10027932, 10032532, 10029147, 10033613, 10033771, 10035329, 10028252, 
10033714, 10027859, 10026658, 10029516, 10030769, 10028211, 10031074, 
10028976, 10034237, 10028414, 10029408, 100296220, 10030650, 
10027200, 10029249, 10032374, 10034502, 10024691, 10026743, 10027112, 
10033068, 10032391, 10035711, 10027797, 10026638, 10031533, 10034233, 
10031882, 10027822, 10026203, 10032144, 10028241, 10031825, 10026656, 
10031067, 10034340, 10032801, 10033399, 10034876, 10027364, 10031486, 
10034002), NilesHeatNo = c("8J47674", "8J51003", "8G58509", "8H51788", 
"8J60248", "8G60351", "9J63427", "8J50670", "8T60855", "8F50950", 
"8G48562", "8G52372", "9H54615", "8J55715", "8K55529", "8G51685", 
"8G62172", "8H50464", "8J52323", "8H50164", "8G54596", "9G48197", 
"9H52494", "8G57871", "8V61894", "8H45452", "9G47724", "8G48507", 
"8T59690", "8G57886", "8H63385", "8F49662", "8F49020", "8G56373", 
"8H61652", "8J57119", "8J49638", "8J46954", "8J57651", "8F49986", 
"8G58447", "8G48520", "8G56064", "8H61297", "8G58851", "8G59461", 
"8G62447", "8J48584", "8H56190", "8R60756"), LookupCode = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "W4 - PROCESS ERROR", 
NA, NA, "U0 - EQUIPMENT BREAK DOWN", NA, NA, NA, NA, NA, "C1 - OXYGEN - HIGH", 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, "C2 - OXYGEN - LOW", "A2 - ALUMINUM - LOW", 
NA, NA, NA, "A1 - ALUMINUM - HIGH"), ScrapWeight = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, 0, NA, 
NA, NA, NA, NA, 13779, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, NA, NA, NA, 
0), Melt = c("8J", "8J", "8G", "8H", "8J", "8G", "9J", "8J", 
"8T", "8F", "8G", "8G", "9H", "8J", "8K", "8G", "8G", "8H", "8J", 
"8H", "8G", "9G", "9H", "8G", "8V", "8H", "9G", "8G", "8T", "8G", 
"8H", "8F", "8F", "8G", "8H", "8J", "8J", "8J", "8J", "8F", "8G", 
"8G", "8G", "8H", "8G", "8G", "8G", "8J", "8H", "8R"), MeltNo = c(47674, 
51003, 58509, 51788, 60248, 60351, 63427, 50670, 60855, 50950, 
48562, 52372, 54615, 55715, 55529, 51685, 62172, 50464, 52323, 
50164, 54596, 48197, 52494, 57871, 61894, 45452, 47724, 48507, 
59690, 57886, 63385, 49662, 49020, 56373, 61652, 57119, 49638, 
46954, 57651, 49986, 58447, 48520, 56064, 61297, 58851, 59461, 
62447, 48584, 56190, 60756), NilesWeight = c(20359, 20797, 19342, 
20585, 17629, 19770, 11776, 20167, NA, 18622, 20401, 19292, 13524, 
16090, 13605, 20099, 20065, 20893, 20659, 20698, 18528, 14016, 
13779, 20701, 19415, 20318, 15152, 20601, 19380, 20032, 19532, 
20395, 19410, 19739, 20728, 18536, 13841, 20478, 20777, 20269, 
17682, 20890, 20344, 19269, 17858, 18101, 20376, 13672, 20427, 
13100), CantonWeight = c(NA, NA, NA, NA, NA, NA, NA, NA, 20235, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), Grade = c("766FW", "766FK", 
"764KS", "764FK", "766BK", "766BK", "766SS", "766BL", "766GK", 
"766JS", "766BL", "766JK", "764SK", "745TS", "764KK", "766BJ", 
"766BK", "765BU", "779KJ", "779BV", "766GK", "766TW", "766TJ", 
"766BK", "766BK", "766BV", "766NS", "766GL", "766BK", "766JK", 
"764KK", "764KW", "766BS", "766BK", "766BK", "763BK", "749BW", 
"766JJ", "765BV", "766FS", "766JJ", "766BL", "779BS", "766BK", 
"766JS", "764KK", "766JK", "764KL", "765BL", "766IK"), MeltDate = structure(c(17030, 
17269, 17759, 17330, 17876, 17882, 18110, 17243, 17921, 17265, 
17100, 17374, 17516, 17577, 17569, 17323, 18005, 17233, 17371, 
17212, 17515, 17071, 17384, 17718, 17988, 16841, 17034, 17096, 
17845, 17719, 18107, 17180, 17131, 17616, 17972, 17665, 17178, 
16976, 17702, 17202, 17755, 17098, 17597, 17946, 17785, 17832, 
18026, 17101, 17605, 17922), class = "Date"), MeltControlRelease = structure(c(1472515200, 
1492732800, 1535068800, 1498176000, 1545782400, 1548892800, 1565740800, 
1490572800, 1549324800, 1492560000, 1478131200, 1502236800, 1514505600, 
1522195200, 1525305600, 1498780800, 1556496000, 1489536000, 1502064000, 
1492041600, 1513728000, 1475712000, 1503878400, 1531440000, 1555027200, 
1456790400, 1472428800, 1478131200, 1542758400, 1532304000, 1564963200, 
1484784000, 1480464000, 1522713600, 1554249600, 1526860800, 1484784000, 
1468281600, 1529971200, 1486684800, 1535500800, 1478131200, 1522800000, 
1551830400, 1538611200, 1542326400, 1558483200, 1478649600, 1521417600, 
1549843200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    O2Range = c("17/20", "17/20", "14/19", "16/20", "16/20", 
    "16/20", NA, "17/20", "18/21", "17/20", NA, "17/20", NA, 
    NA, "13/19", "16/20", "17/20", "10/12", "12 max", "12 max", 
    "17/20", "17/20", "17/20", "17/20", "17/20", "16/20", "17/20", 
    "19/22", "17/20", "17/20", "13/19", "14/19", "17/20", "17/20", 
    "17/20", "17/20", "32/37", "17/20", NA, "14/17", "17/20", 
    "17/20", "9/12", "16/20", "17/20", "8/14", "17/20", "14/19", 
    "10/13", "17/20"), ScrapPct = c("50(T)", "50", "70", "50", 
    "70", "60", NA, "60", "50", "50(T)", NA, "70", NA, "0", "TURNINGS PUCKS", 
    "70", "70", "50", "0", "50", "50", "0", "0", "50", "70", 
    "50", "0", "0(T)", "70", "50", "70(T)", "50", "70", "70", 
    "70", "0", "28", "50(T)", "28", "28", "28", "50", "50", "70", 
    "28", "28", "50(T)", "50", "28", "50"), ReasonLate = c(NA, 
    NA, NA, NA, NA, NA, "remelt of 9J62399", NA, NA, NA, NA, 
    NA, "remelt of 8R54007", "Water spot on intermediate cast, use for non-rotor", 
    NA, NA, "plant power outage, restrike, melt to 42"", NA, 
    NA, NA, NA, NA, "High O2 top and bottom", NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, "Low O2 ingot, apply to a different order", "Low Al/V.  Appley to a different order", 
    NA, NA, NA, "High Al"), O2High = list("20", "20", "19", "20", 
        "20", "20", NA_character_, "20", "21", "20", NA_character_, 
        "20", NA_character_, NA_character_, "19", "20", "20", 
        "12", NA_character_, NA_character_, "20", "20", "20", 
        "20", "20", "20", "20", "22", "20", "20", "19", "19", 
        "20", "20", "20", "20", "37", "20", NA_character_, "17", 
        "20", "20", "12", "20", "20", "14", "20", "19", "13", 
        "20"), O2Low = list("17", "17", "14", "16", "16", "16", 
        NA_character_, "17", "18", "17", NA_character_, "17", 
        NA_character_, NA_character_, "13", "16", "17", "10", 
        NA_character_, NA_character_, "17", "17", "17", "17", 
        "17", "16", "17", "19", "17", "17", "13", "14", "17", 
        "17", "17", "17", "32", "17", NA_character_, "14", "17", 
        "17", "9", "16", "17", "8", "17", "14", "10", "17"), 
    Turnings = c(1, 0, 0, 0, 0, 0, NA, 0, 0, 1, NA, 0, NA, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), Furnace = c("J", 
    "J", "G", "H", "J", "G", "J", "J", "T", "F", "G", "G", "H", 
    "J", "K", "G", "G", "H", "J", "H", "G", "G", "H", "G", "V", 
    "H", "G", "G", "T", "G", "H", "F", "F", "G", "H", "J", "J", 
    "J", "J", "F", "G", "G", "G", "H", "G", "G", "G", "J", "H", 
    "R"), DailyAverageRelativeHumidity = c(85L, 57L, 80L, 66L, 
    76L, 78L, NA, 93L, 62L, 34L, 76L, 72L, 76L, 91L, 82L, 80L, 
    91L, 48L, 78L, 78L, 76L, 59L, 78L, 72L, 46L, 79L, 72L, 77L, 
    72L, 63L, 80L, 70L, 82L, 49L, 53L, 90L, 89L, 58L, 86L, 54L, 
    70L, 72L, 79L, 68L, 87L, 93L, 83L, 90L, 67L, 81L), DailyAverageDewPointTemperature = c(68L, 
    33L, 68L, 67L, 22L, 28L, NA, 32L, 5L, 29L, 32L, 63L, 24L, 
    52L, 16L, 52L, 50L, 26L, 61L, 24L, 12L, 44L, 57L, 60L, 19L, 
    15L, 62L, 37L, 20L, 52L, 66L, 20L, 32L, 22L, 16L, 59L, 46L, 
    54L, 63L, 18L, 60L, 39L, 27L, 12L, 60L, 42L, 51L, 44L, 21L, 
    14L)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", 
"data.frame"))

如果创建中间数据帧,则最简单。

df_plot <- df %>% 
   group_by(LookupCode) %>% 
   summarise(count = n()) %>%
   mutate(percent =100*count/sum(count))

现在,如果您仍然想知道原始百分比,您可以过滤NA。

ggplot(df_plot %>% filter(!is.na(LookupCode)), aes(x=LookupCode, y = count)) +
    geom_col() + geom_text(df_plot, mapping = aes(label = percent))

并添加任何主题和细节

最新更新