我正在使用一个在线的ONS通货膨胀数据集,并试图绘制它的图表,但是当用ggplot绘制它时,x轴不是按时间顺序排列的。(顺序随机)
下面是我的代码,链接到数据集:
install.packages("tidyverse")
library("tidyverse")
install.packages("lubridate")
library("lubridate")
#webscraping the ONS inflation csv file
cpi<-read.csv(url("https://www.ons.gov.uk/generator?format=csv&uri=/economy/inflationandpriceindices/timeseries/d7g7/mm23"))
#removing rows 1 to 7 which contain descriptors, keeping this as a dataframe
cpi<-cpi[-c(1,2,3,4,5,6,7),,drop=FALSE]
#renaming columns as date and inflation
cpi<- cpi %>% rename(date=Title)
cpi<- cpi %>% rename(inflation=CPI.ANNUAL.RATE.00..ALL.ITEMS.2015.100)
#proper title characters for date
cut_cpi$date<- str_to_title(cut_cpi$date)
#subsetting cpi dataset in order to have only the data from the row of 2020 JAN to the last row
cut_cpi<- cpi[(which(cpi$date=="2020 JAN")):nrow(cpi),]
#plotting inflation in a line chart
ggplot(cut_cpi,aes(x=date,y=inflation,group=1,))+geom_line(colour="black")+labs(title="CPI inflation from January 2020") +theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
我认为问题可能与日期有关,因为那是一个字符而不是日期。但是我不能把它变成日期类。
我试过了
cut_cpi$date <- as_factor(cut_cpi$date)
cut_cpi$date <- as_date(cut_cpi$date, format='%Y %b')
我尝试检查区域设置,没有问题
> Sys.setlocale("LC_TIME")
[1] "English_United Kingdom.1252"
你有两个问题。
1-通货膨胀被存储为character
而不是数字,因此无法绘制
2- date被存储为character
,而不是日期,因此它只是按字母顺序绘制。它必须是一个日期,这样才能正确排序,然后只需格式化刻度,以便它以您想要的格式打印日期。
library("tidyverse")
library("lubridate")
#webscraping the ONS inflation csv file
cpi<-read.csv(url("https://www.ons.gov.uk/generator?format=csv&uri=/economy/inflationandpriceindices/timeseries/d7g7/mm23"))
#removing rows 1 to 7 which contain descriptors, keeping this as a dataframe
cpi<-cpi[-c(1,2,3,4,5,6,7),,drop=FALSE]
#renaming columns as date and inflation
cpi<- cpi %>% rename(date=Title)
cpi<- cpi %>% rename(inflation=CPI.ANNUAL.RATE.00..ALL.ITEMS.2015.100)
#proper title characters for date
#THIS FAILS. cut_cpi data.frame hasn't been created yet so this doesn't work. Unnecessary so just remove it.
#cut_cpi$date<- str_to_title(cut_cpi$date)
#subsetting cpi dataset in order to have only the data from the row of 2020 JAN to the last row
cut_cpi<- cpi[(which(cpi$date=="2020 JAN")):nrow(cpi),]
#NEW
cut_cpi<- cut_cpi %>%
mutate(real_date_format= parse_date_time(cut_cpi$date, orders = "%Y %b")) %>%
arrange(desc(real_date_format))
#plotting inflation in a line chart
#NEW
# remove extra comma on aes
# converted inflation to numeric (was character)
# converted real_date_format to date (was datetime). scale_x_date breaks with datetime
ggplot(cut_cpi,aes(x=as_date(real_date_format), y=as.numeric(inflation),group=1))+
geom_line(colour="black")+
labs(title="CPI inflation from January 2020") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
#NEW
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y")
你可以试试:
ggplot(cut_cpi,aes(x=ym(date),y=inflation,group=1,))+geom_line(colour="black")+labs(title="CPI inflation from January 2020") +theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_x_date(date_breaks = "3 month")
您可以更改"3个月";
克莱尔