在不同站点的数据集上运行Mann Kendall



我使用格式的水质数据

Station_Name, Sample_Date, Value, Parameter, Units
F1001         2/04/2020   5.6    NO3        mg/L
F1001         4/05/2020   3.9    NO3        mg/L
F1001         8/06/2020   2.7    NO3        mg/L
F1002         6/03/2020   1.7    NO3        mg/L
F1002         3/04/2020   2.5    NO3        mg/L
F1003         1/03/2020   4.9    NO3        mg/L
F1003         5/04/2020   1.5    NO3        mg/L
F1003         6/05/2020   3.1    NO3        mg/L
F1004         4/05/2020   9.3    NO3        mg/L
F1004         5/06/2020   3.6    NO3        mg/L

excel文件中还有1000多行数据。

我目前使用的程序可以将数据收集到数据库中。在这个程序中,他们包含了R-Console。我需要做的是获取所有数据,在每个站点(而不是整个数据库(上运行Mann-Kendall测试,并给我结果。第7、8和9行是通过双击程序中所需的字段获得的,有趣的是,它是根据我给数据库的数据创建的。

package_list <- c("ggplot2", "Kendall", "dplyr")
lapply(package_list, require, character.only = TRUE)
library(Kendall)
library(dplyr)
library(purrr)
Station_Name = c(STATION[Name,StationGroup(-1)])
SampleDate = c(SAMPLE[SampleDate,StationGroup(-1)])
NO3 = c(PARAMETER[NO3,Unit(mg/L),StationGroup(-1)])
#Add it to a Data Frame
df = data.frame(Station_Name, NO3, SampleDate)

cv <- function(val) {sd(val)/mean(val)}
MK <- function(x){
MKfun <- possibly(Kendall::MannKendall, 
list(tau  = NA_real_,
sl   = NA_real_,
S    = NA_real_,
D    = NA_real_,
varS = NA_real_))
out <- MKfun(x)
class(out) <- "list"
data.frame(out)
}
df %>%
group_by(Station_Name) %>% 
summarise(N     = n(),
MEAN  = mean    (Value),
STDEV = sd      (Value),
CoV   = cv      (Value), 
MAX   = max     (Value),
Q75   = quantile(Value, 0.75), 
Q50   = quantile(Value, 0.50), 
Q25   = quantile(Value, 0.25), 
MIN   = min     (Value),
MK(Value))

print("")
print(df)

当我运行脚本时,它会显示sl=NA_real_的状态错误,:意外的','(第24行(

> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=English_United Kingdom.1252  LC_CTYPE=English_United Kingdom.1252   
[3] LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.1252    
attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     
other attached packages:
[1] reshape2_1.4.4 purrr_0.3.3    dplyr_0.8.5    Kendall_2.2    readxl_1.3.1  
loaded via a namespace (and not attached):
[1] Rcpp_1.0.4       rstudioapi_0.11  magrittr_1.5     tidyselect_1.0.0 R6_2.4.1        
[6] rlang_0.4.5      fansi_0.4.1      stringr_1.4.0    plyr_1.8.6       tools_3.6.3     
[11] utf8_1.1.4       cli_2.0.2        assertthat_0.2.1 tibble_2.1.3     crayon_1.3.4    
[16] vctrs_0.2.4      glue_1.3.2       stringi_1.4.6    compiler_3.6.3   pillar_1.4.3    
[21] cellranger_1.1.0 boot_1.3-24      pkgconfig_2.0.3 

是否需要外部函数?否则,您可以留下一个可读性很强的dplyr代码。。

数据:

df <- tibble::tribble(~Station_Name, ~Sample_Date, ~Value, ~Parameter, ~Units,
"F1001", "2/04/2020", 5.6, "NO3", "mg/L",
"F1001", "4/05/2020", 3.9, "NO3", "mg/L",
"F1001", "8/06/2020", 2.7, "NO3", "mg/L",
"F1002", "6/03/2020", 1.7, "NO3", "mg/L",
"F1002", "3/04/2020", 2.5, "NO3", "mg/L",
"F1003", "1/03/2020", 4.9, "NO3", "mg/L",
"F1003", "5/04/2020", 1.5, "NO3", "mg/L",
"F1003", "6/05/2020", 3.1, "NO3", "mg/L",
"F1004", "4/05/2020", 9.3, "NO3", "mg/L",
"F1004", "5/06/2020", 3.6, "NO3", "mg/L")

解决方案:


library(dplyr)
cv <- function(val) {sd(val)/mean(val)}
df %>%
group_by(Station_Name) %>% 
summarise(MEAN  = mean    (Value),
STDEV = sd      (Value),
CoV   = cv      (Value), 
MAX   = max     (Value),
Q75   = quantile(Value, 0.75), 
Q50   = quantile(Value, 0.50), 
Q25   = quantile(Value, 0.25), 
MIN   = min     (Value))
# # A tibble: 4 x 9
#   Station_Name  MEAN STDEV   CoV   MAX   Q75   Q50   Q25   MIN
#   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001         4.07 1.46  0.358   5.6  4.75  3.9   3.3    2.7
# 2 F1002         2.1  0.566 0.269   2.5  2.3   2.1   1.9    1.7
# 3 F1003         3.17 1.70  0.537   4.9  4     3.1   2.3    1.5
# 4 F1004         6.45 4.03  0.625   9.3  7.88  6.45  5.03   3.6

***********编辑***************

对于MannKendall:

library(Kendall)
library(dplyr)
library(purrr)

MK <- function(x){

# this provides a fallback in case of error
MKfun <- possibly(Kendall::MannKendall, 
list(tau  = NA_real_,
sl   = NA_real_,
S    = NA_real_,
D    = NA_real_,
varS = NA_real_))
out <- MKfun(x)
# force to list and then to df to integrate it with the final tibble
class(out) <- "list"
data.frame(out)

}

df %>%
group_by(Station_Name) %>% 
summarise(MK(Value))
# # A tibble: 4 x 6
#   Station_Name    tau    sl     S     D  varS
#   <chr>         <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001        -1         1    -3     3  3.67
# 2 F1002        NA        NA    NA    NA NA   
# 3 F1003        -0.333     1    -1     3  3.67
# 4 F1004        NA        NA    NA    NA NA   

你会看到一些NA,因为只有2个观测值,无法计算MK。在你的数据中,你不应该有这个问题。万一有退路。

正如你所看到的,我保留了函数自动计算的所有统计数据,因为我不知道你是只需要结果还是其他的。


所有

我把曼肯德尔和你的数据放在这里。

library(Kendall)
library(dplyr)
library(purrr)
cv <- function(val) {sd(val)/mean(val)}
MK <- function(x){

MKfun <- possibly(Kendall::MannKendall, 
list(tau  = NA_real_,
sl   = NA_real_,
S    = NA_real_,
D    = NA_real_,
varS = NA_real_))
out <- MKfun(x)
class(out) <- "list"
data.frame(out)

}
df %>%
group_by(Station_Name) %>% 
summarise(N     = n(),
MEAN  = mean    (Value),
STDEV = sd      (Value),
CoV   = cv      (Value), 
MAX   = max     (Value),
Q75   = quantile(Value, 0.75), 
Q50   = quantile(Value, 0.50), 
Q25   = quantile(Value, 0.25), 
MIN   = min     (Value),
MK(Value))

# # A tibble: 4 x 15
#   Station_Name     N  MEAN STDEV   CoV   MAX   Q75   Q50   Q25   MIN    tau    sl     S     D  varS
#   <chr>        <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001            3  4.07 1.46  0.358   5.6  4.75  3.9   3.3    2.7 -1         1    -3     3  3.67
# 2 F1002            2  2.1  0.566 0.269   2.5  2.3   2.1   1.9    1.7 NA        NA    NA    NA NA   
# 3 F1003            3  3.17 1.70  0.537   4.9  4     3.1   2.3    1.5 -0.333     1    -1     3  3.67
# 4 F1004            2  6.45 4.03  0.625   9.3  7.88  6.45  5.03   3.6 NA        NA    NA    NA NA   

最新更新