从光栅中提取气候数据时的r-NaNs



我正试图从.nc文件中提取PDSI(Palmer干旱严重性指数(值,但我得到了一些位置的神秘NaN值。空间点数据是美国各地的绘图位置。它们是使用NAD 83基准的十进制纬度/经度。

PDSI数据如下:https://wrcc.dri.edu/wwdt/data/PRISM/pdsi/.下载文件pdsi_6_PRISM.nc、pdsi_7_PRISM.nc和pdsi_8_PRISM.nc.这些是整个美国的历史数据(从1895-2022年;6月6日,7月7日,8月8日(。它们在WSG84投影中。

对于每个位置(n=~102000(,我都有纬度/经度数据(数据在这里,从R转储:https://www.dropbox.com/s/xlfh7ubkwlie1ws/dump_pdsi_data.R?dl=0)。我想计算一个";基线";值(1970-1990年6月、7月和8月的平均值(,然后计算两年之间的时间间隔("growInt"(的平均值(MEASTIME_t1和MEASTIME_t2,同样适用于6月、七月和8月(,最后取两者之间的差。

代码似乎在做预期的事情,然而,我用NaN得到了几个位置。有人知道为什么会这样吗?它是否与位置数据为NAD 83和pdsi数据为WSG84有关?

## libraries
library(ncdf4) # package for netcdf manipulation
library(raster) # package for raster manipulation
library(rgdal) # package for geospatial analysis
library(ggplot2) # package for plotting 
library(sp)
### read in the data -  limit to growing season - June, July and August - 
pdsi6 <- nc_open("pdsi_6_PRISM.nc")
pdsi7 <- nc_open("pdsi_7_PRISM.nc")
pdsi8 <- nc_open("pdsi_8_PRISM.nc")

## GET LAT LON and DAY limits for each dataset 
### 6 = AUGUST, 7 = JUNE, 8 = JULY 
lat.6 <- ncvar_get(pdsi6, "latitude", verbose = F)
lon.6 <- ncvar_get(pdsi6, "longitude")
day.6 <- ncvar_get(pdsi6, "day")
###
lat.7 <- ncvar_get(pdsi7, "latitude", verbose = F)
lon.7 <- ncvar_get(pdsi7, "longitude")
day.7 <- ncvar_get(pdsi7, "day")
###
lat.8 <- ncvar_get(pdsi8, "latitude", verbose = F)
lon.8 <- ncvar_get(pdsi8, "longitude")
day.8 <- ncvar_get(pdsi8, "day")
###
## store the data in a 3-dimensional array
pdsi.array.6 <- ncvar_get(pdsi6)
pdsi.array.7 <- ncvar_get(pdsi7) 
pdsi.array.8 <- ncvar_get(pdsi8) 
## make -9999s (fill value) to NAs
pdsi.array.6[pdsi.array.6 == -9999] <- NA
pdsi.array.7[pdsi.array.7 == -9999] <- NA
pdsi.array.8[pdsi.array.8 == -9999] <- NA

## convert to raster bricks    
r_brick.6 <- brick(pdsi.array.6, xmn=min(lat.6), xmx=max(lat.6), ymn=min(lon.6), ymx=max(lon.6), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
r_brick.7 <- brick(pdsi.array.7, xmn=min(lat.7), xmx=max(lat.7), ymn=min(lon.7), ymx=max(lon.7), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
r_brick.8 <- brick(pdsi.array.8, xmn=min(lat.8), xmx=max(lat.8), ymn=min(lon.8), ymx=max(lon.8), crs=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))

## start with null dataframe
source("dump_pdsi_data.R")

##################################################### START FOR LOOP
### for loop to extract time-series data for each plot location
for(i in 1:length(pdsi_data$pltID)){

### extract values 
pdsi_vec.6 <- extract(r_brick.6, 
SpatialPoints(cbind(pdsi_data$LAT[i], pdsi_data$LON[i])), method='simple')

pdsi_vec.7 <- extract(r_brick.7, 
SpatialPoints(cbind(pdsi_data$LAT[i], pdsi_data$LON[i])), method='bilinear') 

pdsi_vec.8 <- extract(r_brick.8, 
SpatialPoints(cbind(pdsi_data$LAT[i], pdsi_data$LON[i])), method='bilinear')

# sp::spTransform(SpatialPoints(cbind(pdsi_data$LAT[i], pdsi_data$LON[i]), proj4string=CRS("+init=epsg:4269 +proj=longlat +ellps=GRS80 +datum=NAD83 +no_defs +towgs84=0,0,0")), CRSobj = crs(r_brick.8))


### add year names to the vector 
names(pdsi_vec.6) <- lubridate::year(as.Date("1900-01-01") + day.6)
names(pdsi_vec.7) <- lubridate::year(as.Date("1900-01-01") + day.7)
names(pdsi_vec.8) <- lubridate::year(as.Date("1900-01-01") + day.8)

### calculate the average baseline value 1970-1990
pdsi_data$pdsi_baseline[i] <- mean(c(pdsi_vec.6[76:96], pdsi_vec.7[76:96], pdsi_vec.8[76:96]), na.rm = T)

### calculate the mean over the growth interval
pdsi_data$pdsi_growInt[i] <- mean(c(as.vector(pdsi_vec.6[names(pdsi_vec.6) %in% 
seq(floor(pdsi_data$MEASTIME_t1[i]), floor(pdsi_data$MEASTIME_t2[i]))]), 
as.vector(pdsi_vec.7[names(pdsi_vec.7) %in% 
seq(floor(pdsi_data$MEASTIME_t1[i]), floor(pdsi_data$MEASTIME_t2[i]))]), 
as.vector(pdsi_vec.8[names(pdsi_vec.8) %in% 
seq(floor(pdsi_data$MEASTIME_t1[i]), floor(pdsi_data$MEASTIME_t2[i]))])),
na.rm = T)
} ## end for loop
#####################################################
## make column for difference
pdsi_data$DeltaPDSI <-  pdsi_data$pdsi_growInt - pdsi_data$pdsi_baseline

我能够通过使用terra包找到这个问题的解决方案。将数据作为光栅而不是netcdf文件导入可能也解决了这个问题。使用terra::extract而不是raster::extract从没有NaN值的光栅返回数据。我不太确定为什么会这样,但它解决了这个问题。

我还了解到NAD83和WSG84可以被视为可交换投影。请参阅R 中的NCEAS坐标参考系统概述

NAD83和WGS84之间的转换通常不建议用于大多数>应用程序,因为标准转换可能会引入>相对于它们的差异而言很大。使事情复杂化的是,差异>NAD83和WGS84之间的关系随时间和位置而变化。

## libraries
library(terra)
### read in the data - as rasters from terra package -  limit to growing season - June, July and August - 
pdsi6 <- rast("pdsi_6_PRISM.nc")
names(pdsi6) <- as.character(1895:2022)
pdsi7 <- rast("pdsi_7_PRISM.nc")
names(pdsi7) <- as.character(1895:2022)
pdsi8 <- rast("pdsi_8_PRISM.nc")
names(pdsi8) <- as.character(1895:2022)
## start with null dataframe
source("dump_pdsi_data.R")
## makes spatVector for spatial points of plot locations
pts <- vect(cbind(pdsi_data$LON, pdsi_data$LAT), crs="+proj=longlat +datum=WGS84")
##################################################### START FOR LOOP
### for loop to extract time-series data for each plot location
for(i in 1:length(pdsi_data$pltID)){

### extract values 
pdsi_vec.6 <- unlist(terra::extract(pdsi6, pts[i]))

pdsi_vec.7 <- unlist(terra::extract(pdsi7, pts[i]))

pdsi_vec.8 <- unlist(terra::extract(pdsi8, pts[i]))

### calculate the average baseline value 1970-1990
pdsi_data$pdsi_baseline[i] <- mean(c(pdsi_vec.6[as.character(rep(1970:1990))], 
pdsi_vec.7[as.character(rep(1970:1990))], 
pdsi_vec.8[as.character(rep(1970:1990))]), 
na.rm = T)

### calculate the mean over the growth interval
pdsi_data$pdsi_growInt[i] <- mean(c(pdsi_vec.6[names(pdsi_vec.6) %in% 
seq(floor(pdsi_data$MEASTIME_t1[i]), floor(pdsi_data$MEASTIME_t2[i]))], 
pdsi_vec.7[names(pdsi_vec.7) %in% 
seq(floor(pdsi_data$MEASTIME_t1[i]), floor(pdsi_data$MEASTIME_t2[i]))], 
pdsi_vec.8[names(pdsi_vec.8) %in% 
seq(floor(pdsi_data$MEASTIME_t1[i]), floor(pdsi_data$MEASTIME_t2[i]))]),
na.rm = T)
} ## end for loop
#####################################################
## make column for difference
pdsi_data$DeltaPDSI <-  pdsi_data$pdsi_growInt - pdsi_data$pdsi_baseline

最新更新