我想知道每个收获年份,vhi低于40的月份的数量。
例如:对于1982年=:3;1983年=3
我想保持数据集的原样。只是为了添加一个额外的列。
在这篇文章之后,我尝试了这个:
require(zoo)
df2$yearmon <- as.yearmon(paste(df2$harvestYear, df2$Month), "%Y %m")
df2= ddply(df2, .(yearmon), transform, drought_months=sum(vhi <= 40) )
然后我会通过分组来计算月份的总数。但它不起作用。
df3= df2 %>%
group_by(gid, country, xcoord, ycoord, harvestYear, vhi) %>%
summarise(drought.vhi.month = sum(drought_months))%>%
ungroup()
数据集:
structure(list(gid = c(100468L, 100468L, 100468L, 100468L, 100468L,
100468L, 100468L, 100468L, 100468L, 100468L, 104078L, 104078L,
104078L, 104078L, 104078L, 104078L, 104078L, 104078L, 104078L,
104078L, 104078L, 115615L, 115615L, 115615L, 115615L, 115615L,
115615L, 115615L, 115615L, 115615L, 115615L, 115615L), country = c("Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)"
), xcoord = c("13.75", "13.75", "13.75", "13.75", "13.75", "13.75",
"13.75", "13.75", "13.75", "13.75", "18.75", "18.75", "18.75",
"18.75", "18.75", "18.75", "18.75", "18.75", "18.75", "18.75",
"18.75", "27.25", "27.25", "27.25", "27.25", "27.25", "27.25",
"27.25", "27.25", "27.25", "27.25", "27.25"), ycoord = c("-20.25",
"-20.25", "-20.25", "-20.25", "-20.25", "-20.25", "-20.25", "-20.25",
"-20.25", "-20.25", "-17.75", "-17.75", "-17.75", "-17.75", "-17.75",
"-17.75", "-17.75", "-17.75", "-17.75", "-17.75", "-17.75", "-9.75",
"-9.75", "-9.75", "-9.75", "-9.75", "-9.75", "-9.75", "-9.75",
"-9.75", "-9.75", "-9.75"), Time = structure(c(4352, 4383, 4717,
4414, 4442, 4473, 4748, 5082, 4779, 4807, 9862, 10135, 10166,
10196, 9893, 9921, 9952, 10105, 10227, 10500, 10531, 13057, 13088,
13118, 12904, 12935, 12965, 12996, 13027, 13422, 13453, 13483
), class = "Date"), harvestYear = c(1982L, 1982L, 1983L, 1982L,
1982L, 1982L, 1983L, 1984L, 1983L, 1983L, 1997L, 1998L, 1998L,
1998L, 1997L, 1997L, 1997L, 1998L, 1998L, 1999L, 1999L, 2005L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2006L, 2006L,
2006L), Month = c(12L, 1L, 12L, 2L, 3L, 4L, 1L, 12L, 2L, 3L,
1L, 10L, 11L, 12L, 2L, 3L, 4L, 9L, 1L, 10L, 11L, 10L, 11L, 12L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), vhi = c("51.0890450353229",
"32.441240310669", "40.847339630127", "21.9409523010254", "23.7816608428955",
"46.6207218170166", "23.5423679351807", "42.2399349212647", "19.0511212348938",
"10.1214292049408", "59.2775287628174", "68.3184223175049", "63.1654891967773",
"46.8237014770508", "66.5376491546631", "68.1310760498047", "70.3945808410645",
"70.0717346191406", "38.8603992462158", "42.2310523986816", "47.7076560974121",
"38.2882362365723", "32.5415182113648", "36.3100366592407", "48.1315612792969",
"49.0768402099609", "52.1510925292969", "53.8123741149902", "50.4373531341553",
"51.5549461364746", "51.573205947876", "46.0322494506836")), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 40000L, 40001L, 40002L,
40003L, 40004L, 40005L, 40006L, 40007L, 40008L, 40009L, 40010L,
200000L, 200001L, 200002L, 200003L, 200004L, 200005L, 200006L,
200007L, 200008L, 200009L, 200010L), class = "data.frame")
我看到1982年的第三个月(vhi <= 40
(。此外,vhi
中的值是字符,因此需要先更改为数字。
df2 %>%
mutate(vhi = as.numeric(vhi)) %>%
filter(harvestYear == 1982)
# gid country xcoord ycoord Time harvestYear Month vhi
#1 100468 Namibia 13.75 -20.25 1981-12-01 1982 12 51.08905
#2 100468 Namibia 13.75 -20.25 1982-01-01 1982 1 32.44124
#3 100468 Namibia 13.75 -20.25 1982-02-01 1982 2 21.94095
#4 100468 Namibia 13.75 -20.25 1982-03-01 1982 3 23.78166
#5 100468 Namibia 13.75 -20.25 1982-04-01 1982 4 46.62072
你可以试试
library(dplyr)
df2 %>%
mutate(vhi = as.numeric(vhi)) %>%
group_by(harvestYear) %>%
mutate(drought_months = sum(vhi <= 40))
# gid country xcoord ycoord Time harvestYear Month vhi drought_months
# <int> <chr> <chr> <chr> <date> <int> <int> <dbl> <int>
# 1 100468 Namibia 13.75 -20.25 1981-12-01 1982 12 51.1 3
# 2 100468 Namibia 13.75 -20.25 1982-01-01 1982 1 32.4 3
# 3 100468 Namibia 13.75 -20.25 1982-12-01 1983 12 40.8 3
# 4 100468 Namibia 13.75 -20.25 1982-02-01 1982 2 21.9 3
# 5 100468 Namibia 13.75 -20.25 1982-03-01 1982 3 23.8 3
# 6 100468 Namibia 13.75 -20.25 1982-04-01 1982 4 46.6 3
# 7 100468 Namibia 13.75 -20.25 1983-01-01 1983 1 23.5 3
# 8 100468 Namibia 13.75 -20.25 1983-12-01 1984 12 42.2 0
# 9 100468 Namibia 13.75 -20.25 1983-02-01 1983 2 19.1 3
#10 100468 Namibia 13.75 -20.25 1983-03-01 1983 3 10.1 3
# … with 22 more rows