"ValueError: Array conditional must be same shape as self"



我正在尝试遵循教程。基本上,我想在我的31波段Sentinel 1和Sentinel 2堆叠图像上运行随机森林分类。此外,我想提取光栅值到我的训练测试形状文件。这是我尝试过的:

from osgeo import gdal
from PIL import Image
import numpy as np
import matplotlib as mtp
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import earthpy.plot as ep
import rasterio
from rasterio.plot import reshape_as_raster, reshape_as_image
get_ipython().run_line_magic('matplotlib', 'inline')
pd.options.display.max_colwidth = 89

# In[2]:

#setting the path for image
S1_S2_stack = 'S1_S2_stack.tif'
#path to training and validation data
training_points = 'testing.shp'
validation_points = 'training.shp'

# In[3]:

colors = dict ((
(0, (0,76,153,255)),  #wheat
(1, (0,153,0,255)),   #corn
(2, (255,0,0,255)),   #other
(3, (255,153,51,255)),
(4, (255,255,0,255))

))

# In[4]:

for k in colors:
v = colors [k]
_v = [_v / 255.0 for _v in v]
colors[k] = _v

index_colors = [colors[key] if key in colors else (1,1,1,0) for key in range (0,5)]
cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', 5)

# In[5]:

src = rasterio.open(S1_S2_stack)
bands = src.read()

# In[6]:

stack =src. read()
print(stack.shape)
fig, (ax1, ax2) = plt.subplots(1,2,figsize= (20,10))
ax1 = ep.plot_rgb(arr = stack, rgb =(3, 2, 1), stretch=True, ax = ax1, title = "RGB Composite - Sentinel-2")
stack_s1 =np.stack ((stack[28],stack[29],stack[29]/stack[28]))
ax2 = ep.plot_rgb(arr = stack_s1, rgb = (1,0,2), stretch=True, ax = ax2, title= "RGB Composite - Sentinel-1 (VV, VH, VV/VH)")
plt.tight_layout()

# In[7]:

img = src.read()
profile = src.profile
with rasterio.io.MemoryFile () as memfile:
with memfile.open(**profile) as dst:
for i in range(0, src.count):
dst.write(img[i], i+1)
dataset = memfile.open()

从这里开始工作。但是当我运行这段代码时:

#read points from shapefile
train_pts = gpd.read_file (training_points)
train_pts = train_pts[[ 'CID','class', 'POINT_X','POINT_Y']] #attribute fields in shapefile
train_pts.index = range(len(train_pts))
coords = [(x,y) for x, y in zip(train_pts.POINT_X, train_pts.POINT_Y)] #create list of point coordinates
#sample each band of raster dataset at each point in the coordinate list
train_pts ['Raster Value'] = [x for x in dataset.sample(coords)] #all band values saved as a list in the Raster Value column
#Unpack the raster value column to separate column for each band - band names retrieved from snappy in the video but I was looking for an option
train_pts[bands] = pd.DataFrame(train_pts['Raster Value'].tolist(), index = train_pts.index)
train_pts = train_pts.drop(['Raster Value'], axis=1) #drop raster value column
#change the values for last three classes 
train_pts['CID'] = train_pts['CID'].replace([7,8,15],[5,6,7])
train_pts.to.csv('train_pts.csv') #save as csv
train_pts.head () #see first column

我得到以下错误:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [9], in <cell line: 10>()
8 train_pts ['Raster Value'] = [x for x in dataset.sample(coords)] #all band values saved as a list in the Raster Value column
9 #Unpack the raster value column to separate column for each band - band names retrieved from snappy in the video but I was looking for an option
---> 10 train_pts[src1] = pd.DataFrame(train_pts['Raster Value'].tolist(), index = train_pts.index)
11 train_pts = train_pts.drop(['Raster Value'], axis=1) #drop raster value column
12 #change the values for last three classes 
File ~.condaenvsgeocomp3_clonelibsite-packagespandascoreframe.py:3643, in DataFrame.__setitem__(self, key, value)
3641     self._setitem_frame(key, value)
3642 elif isinstance(key, (Series, np.ndarray, list, Index)):
-> 3643     self._setitem_array(key, value)
3644 elif isinstance(value, DataFrame):
3645     self._set_item_frame_value(key, value)
File ~.condaenvsgeocomp3_clonelibsite-packagespandascoreframe.py:3685, in DataFrame._setitem_array(self, key, value)
3680 else:
3681     # Note: unlike self.iloc[:, indexer] = value, this will
3682     #  never try to overwrite values inplace
3684     if isinstance(value, DataFrame):
-> 3685         check_key_length(self.columns, key, value)
3686         for k1, k2 in zip(key, value.columns):
3687             self[k1] = value[k2]
File ~.condaenvsgeocomp3_clonelibsite-packagespandascoreindexersutils.py:428, in check_key_length(columns, key, value)
426 if columns.is_unique:
427     if len(value.columns) != len(key):
--> 428         raise ValueError("Columns must be same length as key")
429 else:
430     # Missing keys in columns are represented as -1
431     if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):
ValueError: Columns must be same length as key

所以我的问题如下:

  1. 我用来导入的方法是否有问题shapefile的乐队吗?
  2. 我是否需要在我输入的代码中编写所有字段shapefile的属性信息?或者我应该编辑这些在GIS程序中的字段?

根据band description创建新的DataFrame列

如果你说

src = rasterio.open(S1_S2_stack)
bands = src.read()

bands为三维Numpyndarray(一系列栅格帧)。它没有意义使用索引像train_pts[bands],其中train_pts是一个数据帧。但我猜你想指的是乐队的名字。如果是,试试src.descriptions:

band_names = [*src.descriptions]
train_pts[band_names] = pd.DataFrame(train_pts['Raster Value'].tolist(), index = train_pts.index)

需要注意的一些陷阱

  • band_names应该是一个列表,以便我们可以将它们用作train_pts[band_names]的索引。由于src.descriptions是一个元组,我们必须将其转换为list
  • 如果在波段描述中有空值或重复值,我们需要以某种方式处理它们:
band_names = [f'{src.descriptions[i]}, band {i}' for i in range(1, src.count + 1)]
  • 我们可以强制使用一些默认名称来替代前面的注释:
band_names = [f'Band {i}' for i in range(1, src.count + 1)]

相关内容

  • 没有找到相关文章

最新更新