我有366个tif格式的光栅图像文件(MODIS卫星每日数据(,其中包含雪数据,另一个csv文件包含19000个位置(纬度和经度(。我需要从光栅文件中收集雪的数据。我已经尝试过使用GDALpython库收集数据。然而,该程序从每个文件中收集数据大约需要30分钟。这意味着我必须运行大约180个小时的代码。以下是我正在使用的代码。请建议我是否可以提高程序执行速度,或者是否有更好的方法可以实现。
import gdal
import pandas
import numpy as np
import os,subprocess
def runCmdAndGetOutput(cmd) :
outList = []
proc = subprocess.Popen(cmd,stdout=subprocess.PIPE)
while True:
line = proc.stdout.readline()
if not line:
break
#the real code does filtering here
outList.append(line.rstrip())
print(outList)
# value = float(outList[2].decode("utf-8").replace("<Value>","").replace("</Value>",""))
value = float(outList[0].decode("utf-8"))
return value
# ndsiFile = "2016001.tif"
locs = "hkkhlocations.csv"
ndsFileLoc = r"D:SrinivasaRao_DocsMODIS_NDSI_V6_20165000000499560out"
# with open(locs) as f:
# locData = f.readlines()
latLnginfo = pandas.read_csv(locs)
print(latLnginfo.columns)
print(latLnginfo.shape)
# outDf = pandas.DataFrame()
outDf = pandas.DataFrame(np.zeros([len(latLnginfo),370])*np.nan)
day =1
print(os.listdir(ndsFileLoc))
print(type(os.listdir(ndsFileLoc)))
datasetsList = os.listdir(ndsFileLoc)
for eFile in datasetsList:
rCount = 0
# print(eFile)
cCount = int(eFile[4:7])
# print(cCount)
with open("output.csv") as f :
for line in f :
locData = line.split(",")
cmdToRun = ["gdallocationinfo" ,"-valonly", "-wgs84", os.path.join(ndsFileLoc,eFile) ,str(latLnginfo.iloc[rCount,4]), str(latLnginfo.iloc[rCount,3])]# str(locData[0]), str(locData[1])]
v = runCmdAndGetOutput(cmdToRun)
outDf.iloc[rCount,cCount]= float(v)
rCount = rCount + 1
print("rowno: ", rCount, "Dayno :", cCount, "SCF value: ", v)
day = day+1
outDf.to_csv('test.csv')
'''
def run_cmd_processor(efile):
r_count = 0
c_count = int(efile[4:7])
with open("output.csv") as f :
for line in f :
loc_data = line.split(",")
# ~
pool = multiprocessing.Pool(processes=2) # You can add more processes
pool.map(run_cmd_processor, datasetsList)
pool.close()
pool.join()
似乎唯一可以拥有多处理分支的点是"对于datasetsList中的eFile:"。它可以像鞋面一样改变。
我的建议是,您不需要通过子流程调用gdal。只需通过gdal读取HDF文件,然后获得长/纬度坐标下的像素值:
from osgeo import gdal
src = <location_to_your_hdf>
ds = gdal.Open(src,gdal.GA_ReadOnly)
## get your subdataset, to find out which one -> ds.GetSubdatasets()
subdata = gdal.Open(ds.GetSubDatasets()[0][0], gdal.GA_ReadOnly)
## get Geometadata
gt = subdata.GetGeoTransform()
## now locate the pixel by transforming them from coordinates to width/height
px = int((lat - gt[0]) / gt[1])
py = int((long - gt[3]) / gt[5])
pixelval = subdata.ReadAsArray(px, py, 1, 1)
这应该比您的子流程调用快得多,因为您只需要打开一次hdf文件,然后在坐标列表中循环,而不需要为每个坐标调用gdallocationinfo。
干杯