好的,所以这基本上是我之前的一个问题的后续。我有一些浮点二进制格式的二进制数据。使用C,该过程快,但是使用atof()会损失一些精度。我试着通过论坛和其他地方寻找,但我的问题没有解决。因此,我转向了python。快乐啊!程序工作得很好,但是与c相比非常慢。我查找了python的优化,它指向Cython和Weave,但我有一些疑问。如果您遵循我的代码,我很困惑在哪里应用优化的C代码,因为我从numpy对象读取。我的问题是,是否可以在Cython中使用numpy函数读取数据,如果可以,请提供一个小示例。
C代码使用PolSARpro的头文件,和libbmp来创建。bmp文件
作为一个说明,我张贴我的代码。天知道,为了让这些公式起作用,我经历了很多。这样,其他有需要的人也可以给出他们的想法和建议:)
C代码(工作,但atof()失去精度,因此输出的长度稍微偏离)
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <polSARpro/bmpfile.c>
#include <polSARpro/graphics.c>
#include <polSARpro/matrix.c>
#include <polSARpro/processing.c>
#include <polSARpro/util.c>
#define METAL_THRESHOLD 5.000000
#define POLARIZATION_FRACTION_THRESHOLD 0.900000
#define PI 3.14159265
#define FOURTHPI PI/4
#define deg2rad PI/180
#define rad2deg 180./PI
/*double PI = 3.14159265;
double FOURTHPI = PI / 4;
double deg2rad = PI / 180;
double rad2deg = 180.0 / PI;*/
FILE *L1,*PF,*SPF;
FILE *txt;
FILE *finalLocations;
long i=0,loop_end;
int lig,col;
float l1,pf,spf;
long pos;
int Nlig,Ncol;
float *bufferout;
float *bufferin_L1,*bufferin_L2;
float valueL1,valuePF,xx;
float sizeGridX, sizeGridY, startX, startY;
float posX,posY;
int ZONE;
char Heading[10];
char setZone[15];
int p[4][2];
int degree, minute, second;
void UTM2LL(int ReferenceEllipsoid, double UTMNorthing, double UTMEasting, char* UTMZone, double *Lat, double *Long)
{
//converts UTM coords to lat/long. Equations from USGS Bulletin 1532
//East Longitudes are positive, West longitudes are negative.
//North latitudes are positive, South latitudes are negative
//Lat and Long are in decimal degrees.
//Written by Chuck Gantz- chuck.gantz@globalstar.com
double k0 = 0.9996;
double a = 6378137;
double eccSquared = 0.00669438;
double eccPrimeSquared;
double e1 = (1-sqrt(1-eccSquared))/(1+sqrt(1-eccSquared));
double N1, T1, C1, R1, D, M;
double LongOrigin;
double mu, phi1, phi1Rad;
double x, y;
int ZoneNumber;
char* ZoneLetter;
int NorthernHemisphere; //1 for northern hemispher, 0 for southern
x = UTMEasting - 500000.0; //remove 500,000 meter offset for longitude
y = UTMNorthing;
ZoneNumber = strtoul(UTMZone, &ZoneLetter, 10);
if((*ZoneLetter - 'N') >= 0)
NorthernHemisphere = 1;//point is in northern hemisphere
else
{
NorthernHemisphere = 0;//point is in southern hemisphere
y -= 10000000.0;//remove 10,000,000 meter offset used for southern hemisphere
}
LongOrigin = (ZoneNumber - 1)*6 - 180 + 3; //+3 puts origin in middle of zone
eccPrimeSquared = (eccSquared)/(1-eccSquared);
M = y / k0;
mu = M/(a*(1-eccSquared/4-3*eccSquared*eccSquared/64-5*eccSquared*eccSquared*eccSquared/256));
phi1Rad = mu + (3*e1/2-27*e1*e1*e1/32)*sin(2*mu)
+ (21*e1*e1/16-55*e1*e1*e1*e1/32)*sin(4*mu)
+(151*e1*e1*e1/96)*sin(6*mu);
phi1 = phi1Rad*rad2deg;
N1 = a/sqrt(1-eccSquared*sin(phi1Rad)*sin(phi1Rad));
T1 = tan(phi1Rad)*tan(phi1Rad);
C1 = eccPrimeSquared*cos(phi1Rad)*cos(phi1Rad);
R1 = a*(1-eccSquared)/pow(1-eccSquared*sin(phi1Rad)*sin(phi1Rad), 1.5);
D = x/(N1*k0);
*Lat = phi1Rad - (N1*tan(phi1Rad)/R1)*(D*D/2-(5+3*T1+10*C1-4*C1*C1-9*eccPrimeSquared)*D*D*D*D/24
+(61+90*T1+298*C1+45*T1*T1-252*eccPrimeSquared-3*C1*C1)*D*D*D*D*D*D/720);
*Lat = *Lat * rad2deg;
*Long = (D-(1+2*T1+C1)*D*D*D/6+(5-2*C1+28*T1-3*C1*C1+8*eccPrimeSquared+24*T1*T1)
*D*D*D*D*D/120)/cos(phi1Rad);
*Long = LongOrigin + *Long * rad2deg;
}
void convertToDegree(float decimal)
{
int negative = decimal < 0;
decimal = abs(decimal);
minute = (decimal * 3600/ 60);
second = fmodf((decimal * 3600),60);
degree = minute / 60;
minute = minute % 60;
if (negative)
{
if (degree > 0)
degree = -degree;
else if (minute > 0)
minute = -minute;
else
second = -second;
}
}
void readConfig(int *Row, int *Col)
{
char tmp[70];
int i=0;
FILE *fp = fopen("config.txt","r");
if(fp == NULL)
{
perror("Config.txt");
exit(1);
}
while(!feof(fp))
{
fgets(tmp,70,fp);
if (i==1)
*Row = atoi(tmp);
if(i==4)
*Col = atoi(tmp);
i++;
}
fclose(fp);
}
void readHDR(float *gridX,float *gridY,float *startXPos,float *startYPos)
{
FILE *fp = fopen("PF.bin.hdr","r");
int i=0;
char tmp[255];
char junk[255];
memset(tmp,0X00,sizeof(tmp));
memset(junk,0X00,sizeof(junk));
if(fp==NULL)
{
perror("Please locate or create PF.bin.hdr");
exit(0);
}
while(!feof(fp))
{
if(i==13)
break;
fgets(tmp,255,fp);
i++;
}
fclose(fp);
strcpy(junk,strtok(tmp,","));
strtok(NULL,",");
strtok(NULL,",");
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*startXPos = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*startYPos = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*gridX = atof(tmp);
strcpy(tmp,strtok(NULL,","));
//puts(tmp);
*gridY = atof(tmp);
strcpy(tmp,strtok(NULL,","));
ZONE = atoi(tmp);
strcpy(tmp,strtok(NULL,","));
strcpy(Heading,tmp);
}
int main()
{
bmpfile_t *bmp;
double Lat;
double Long;
int i;
rgb_pixel_t pixelMetal = {128, 64, 0, 0};
rgb_pixel_t pixelOthers = {128, 64, 0, 0};
readConfig(&Nlig,&Ncol);
readHDR(&sizeGridX,&sizeGridY,&startX,&startY);
//startX = startX - (double) 0.012000;
//startY = startY + (double)0.111000;
printf("Enter the rectangle's top-left and bottom-right region of interest points as: x yn");
for(i=0;i<2;i++)
{
printf("Enter point %d::t",i+1);
scanf("%d %d",&p[i][0], &p[i][1]);
}
printf("Grid Size(X,Y)::( %f,%f ), Start Positions(X,Y)::( %f, %f ), ZONE::%d, Heading:: %snn",sizeGridX,sizeGridY,startX,startY,ZONE,Heading);
pixelMetal.red = 255;
pixelMetal.blue = 010;
pixelMetal.green = 010;
pixelOthers.red = 8;
pixelOthers.blue = 8;
pixelOthers.green = 8;
L1 = fopen("l1.bin","rb");
PF =fopen("PF.bin","rb");
SPF = fopen("SPF_L1.bin","wb");
//txt = fopen("locations(UTM).txt","w");
finalLocations = fopen("locationsROI.txt","w");
if(L1==NULL || PF==NULL || SPF==NULL || finalLocations == NULL)
{
perror("Error in opening files!");
return -1;
}
fseek(L1,0,SEEK_END);
pos = ftell(L1);
loop_end = pos;
printf("L1.bin contains::t%ld elementsn",pos);
fseek(PF,0,SEEK_END);
pos = ftell(PF);
printf("PF.bin contains::t%ld elementsn",pos);
fseek(L1,0,SEEK_SET);
fseek(PF,0,SEEK_SET);
bmp = bmp_create(Ncol,Nlig,8); //width * height
bufferin_L1 = vector_float(Ncol);
bufferin_L2 = vector_float(Ncol);
bufferout = vector_float(Ncol);
printf("Resources Allocated. Beginning...n");
for (lig = 0; lig < Nlig; lig++) /* rows */
{
if (lig%(int)(Nlig/20) == 0)
{
printf("%fr", 100. * lig / (Nlig - 1));
fflush(stdout);
}
fread(&bufferin_L1[0], sizeof(float), Ncol, L1);
fread(&bufferin_L2[0], sizeof(float), Ncol, PF);
for (col = 0; col < Ncol; col++) /* columns */
{
valueL1 = bufferin_L1[col];
valuePF = bufferin_L2[col];
if(valueL1 >= METAL_THRESHOLD && valuePF >= POLARIZATION_FRACTION_THRESHOLD)
{
if(col >= p[0][0] && col <= p[1][0] && lig >= p[0][1] && lig <= p[1][1])
{
xx = fabs(valueL1 + valuePF);
bmp_set_pixel(bmp,col,lig,pixelMetal);
posX = startX + (sizeGridX * col);
posY = startY - (sizeGridY * lig);
//fprintf(txt,"%f %f %d %sn",posX,posY,ZONE,Heading);
sprintf(setZone,"%d",ZONE);
if(strstr(Heading,"Nor")!=NULL)
strcat(setZone,"N");
else
strcat(setZone,"S");
UTM2LL(23, posY, posX, setZone, &Lat, &Long); // 23 for WGS-84
convertToDegree(Lat);
//fprintf(finalLocations,"UTM:: %.2fE %.2fN , Decimal: %f %f , Degree: %d %d %d, ",posX,posY,Lat,Long,degree,minute,second);
//fprintf(finalLocations,"%.2fE,%.2fN,%f,%f ,%d,%d,%d,",posX,posY,Lat,Long,degree,minute,second);
fprintf(finalLocations,"%.2f,%.2f,%f,%f ,%d,%d,%d,",posX,posY,Lat,Long,degree,minute,second);
convertToDegree(Long);
fprintf(finalLocations,"%d,%d,%dn",degree,minute,second);
}
else
{
xx = fabs(valueL1) ;
bmp_set_pixel(bmp,col,lig,pixelOthers);
}
}
else
{
xx = fabs(valueL1) ;
bmp_set_pixel(bmp,col,lig,pixelOthers);
}
bufferout[col] = xx;
}
fwrite(&bufferout[0], sizeof(float), Ncol, SPF);
}
free_vector_float(bufferout);
fclose(L1);
fclose(PF);
fclose(SPF);
//fclose(txt);
fclose(finalLocations);
printf("n----------Writing BMP File!----------n");
bmp_save(bmp,"SPF_L1(ROI).bmp");
bmp_destroy(bmp);
printf("nDone!n");
}
以及Python代码::
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 10 10:29:18 2013
@author: Binayaka
"""
import numpy as Num;
import math;
import array;
class readConfiguration(object):
def __init__(self,x):
self.readConfig(x);
def readConfig(self,x):
try:
crs = open(x,'r');
srs = open('config.txt','r');
except IOError:
print "Files missing!";
else:
rows = crs.readlines();
values = rows[12].split(',');
rows = srs.readlines();
self.startX = float(values[3]);
self.startY = float(values[4]);
self.gridSizeX = float(values[5]);
self.gridSizeY = float(values[6]);
self.Zone = int(values[7]);
self.Hemisphere = values[8];
self.NRows = int(rows[1].strip());
self.NCols = int(rows[4].strip());
self.MetalThreshold = 5.000000;
self.PFThreshold = 0.900000;
self.rad2deg = 180/math.pi;
self.deg2rad = math.pi/180;
self.FOURTHPI = math.pi/4;
crs.close();
srs.close();
def decdeg2dms(dd):
negative = dd < 0;
dd = abs(dd);
minutes,seconds = divmod(dd*3600,60);
degrees,minutes = divmod(minutes,60);
if negative:
if degrees > 0:
degrees = -degrees;
elif minutes > 0:
minutes = -minutes;
else:
seconds = -seconds;
return (degrees,minutes,seconds);
def UTM2LL(self,UTMEasting, UTMNorthing):
k0 = 0.9996;
a = 6378137;
eccSquared = 0.00669438;
e1 = (1-math.sqrt(1-eccSquared))/(1+math.sqrt(1-eccSquared));
x = UTMEasting - 500000.0;#remove 500,000 meter offset for longitude
y = UTMNorthing;
if self.Hemisphere == "North":
self.Hemi = 1;
else:
self.Hemi = -1;
y -= 10000000.0;
LongOrigin = (self.Zone - 1)*6 - 180 + 3;
eccPrimeSquared = (eccSquared)/(1-eccSquared);
M = y / k0;
mu = M/(a*(1-eccSquared/4-3*eccSquared*eccSquared/64-5*eccSquared*eccSquared*eccSquared/256));
phi1Rad = mu + (3*e1/2-27*e1*e1*e1/32)*math.sin(2*mu) + (21*e1*e1/16-55*e1*e1*e1*e1/32)*math.sin(4*mu) +(151*e1*e1*e1/96)*math.sin(6*mu);
#phi1 = phi1Rad*self.rad2deg;
N1 = a/math.sqrt(1-eccSquared*math.sin(phi1Rad)*math.sin(phi1Rad));
T1 = math.tan(phi1Rad)*math.tan(phi1Rad);
C1 = eccPrimeSquared*math.cos(phi1Rad)*math.cos(phi1Rad);
R1 = a*(1-eccSquared)/pow(1-eccSquared*math.sin(phi1Rad)*math.sin(phi1Rad), 1.5);
D = x/(N1*k0);
self.Lat = phi1Rad - (N1*math.tan(phi1Rad)/R1)*(D*D/2-(5+3*T1+10*C1-4*C1*C1-9*eccPrimeSquared)*D*D*D*D/24 +(61+90*T1+298*C1+45*T1*T1-252*eccPrimeSquared-3*C1*C1)*D*D*D*D*D*D/720);
self.Lat = self.Lat * self.rad2deg;
self.Long = (D-(1+2*T1+C1)*D*D*D/6+(5-2*C1+28*T1-3*C1*C1+8*eccPrimeSquared+24*T1*T1)*D*D*D*D*D/120)/math.cos(phi1Rad);
self.Long = LongOrigin + self.Long * self.rad2deg;
def printConfiguration(self):
""" Just to check whether our reading was correct """
print "Metal Threshold:t" + str(self.MetalThreshold);
print "PF Threshold:t" + str(self.PFThreshold);
print "Start X:t" + str(self.startX);
print "Start Y:t" + str(self.startY);
print "Grid size(X) :t" + str(self.gridSizeX);
print "Grid size(Y) :t" + str(self.gridSizeY);
def createROIfile(self,ROIFilename):
firstPoint = raw_input('Enter topLeft point coordt').split();
secondPoint = raw_input('Enter bottomRight point coordt').split();
try:
L1 = open('l1.bin','rb');
PF = open('PF.bin','rb');
SPF = open('pySPF_L1.bin','wb');
targetFilename = open(ROIFilename,'w');
except IOError:
print "Files Missing!";
else:
L1.seek(0,2);
elementsL1 = L1.tell();
L1.seek(0,0);
PF.seek(0,2);
elementsPF = PF.tell();
PF.seek(0,0);
print "L1.bin containst" + str(elementsL1) + " elements";
print "PF.bin containst" + str(elementsPF) + " elements";
binvaluesL1 = array.array('f');
binvaluesPF = array.array('f');
binvaluesSPF = array.array('f');
for row in range(0,self.NRows):
binvaluesL1.read(L1,self.NCols);
binvaluesPF.read(PF,self.NCols);
dataL1 = Num.array(binvaluesL1, dtype=Num.float);
dataPF = Num.array(binvaluesPF, dtype=Num.float);
dataSPF = dataL1 + dataPF;
binvaluesSPF.fromlist(Num.array(dataSPF).tolist());
for col in range(0,self.NCols):
if(dataL1[col] >= self.MetalThreshold and dataPF[col] >= self.PFThreshold):
if(col >= int(firstPoint[0]) and col <= int(secondPoint[0]) and row >= int(firstPoint[1]) and row <= int(secondPoint[1])):
posX = self.startX + (self.gridSizeX * col);
posY = self.startY - (self.gridSizeY * row);
self.UTM2LL(posY,posX);
tmp1 = self.decdeg2dms(posY);
tmp2 = self.decdeg2dms(posX);
strTarget = "Decimal Degree:: " + str(posX) + "E " + str(posY) + "N t Lat long:: " + str(tmp1) + " " + str(tmp2) + "n";
targetFilename.write(strTarget);
binvaluesSPF.tofile(SPF);
L1.close();
PF.close();
SPF.close();
targetFilename.close();
print "Done!";
dimensions = readConfiguration('PF.bin.hdr');
dimensions.printConfiguration();
dimensions.createROIfile('testPythonROI.txt');
需要优化的是Python代码,因为NRows和NCols的值可以并且确实达到数千的数量级。
一些一般性评论:
-
使用python,出于多种原因,最好坚持使用PEP8。Python程序员对可读性特别挑剔,基本上普遍遵守社区编码指南(PEP8)。避免驼峰式大小写,每行不超过80列,去掉分号,并且可以随意忽略这些会降低可读性的指导原则。
-
如果您使用numpy,则不需要在这里使用内置的
array
类型。 -
使用投影库。指定你使用的基准面和椭球体,否则坐标(东/北或晚/长)完全没有意义。
-
不要用一个大的类来容纳不相关的东西。只有几个函数是没有错的。除非有必要,否则不需要将其放入类中。
-
对numpy数组使用矢量化操作。
下面是你的性能瓶颈:
for row in range(0,self.NRows):
binvaluesL1.read(L1,self.NCols);
binvaluesPF.read(PF,self.NCols);
dataL1 = Num.array(binvaluesL1, dtype=Num.float);
dataPF = Num.array(binvaluesPF, dtype=Num.float);
dataSPF = dataL1 + dataPF;
binvaluesSPF.fromlist(Num.array(dataSPF).tolist());
for col in range(0,self.NCols):
if(dataL1[col] >= self.MetalThreshold and dataPF[col] >= self.PFThreshold):
if(col >= int(firstPoint[0]) and col <= int(secondPoint[0]) and row >= int(firstPoint[1]) and row <= int(secondPoint[1])):
posX = self.startX + (self.gridSizeX * col);
posY = self.startY - (self.gridSizeY * row);
self.UTM2LL(posY,posX);
tmp1 = self.decdeg2dms(posY);
tmp2 = self.decdeg2dms(posX);
strTarget = "Decimal Degree:: " + str(posX) + "E " + str(posY) + "N t Lat long:: " + str(tmp1) + " " + str(tmp2) + "n";
targetFilename.write(strTarget);
binvaluesSPF.tofile(SPF);
你最大的问题之一是你读取数据的方式。你不断地把东西作为一个东西读入,然后把它转换成一个列表,然后把它转换成一个numpy数组。完全没有必要绕过所有这些障碍。Numpy会像array
一样为你解包二进制浮点数。
只做grid = np.fromfile(yourfile, dtype=np.float32).reshape(ncols, nrows)
。(循环外)
之后,您的嵌套循环可以很容易地矢量化,并表示只需几行代码。
我将如何编写您的代码。这可能不会按原样运行,因为我无法用您的数据进行测试。然而,它应该给你一些大致的想法。
import numpy as np
import pyproj
def main():
config = Config('PF.bin.hdr')
grid1, grid2 = load_data('l1.bin', 'PF.bin', config.nrows, config.ncols)
spf = grid1 + grid2
spf.tofile('pySPF_L1.bin')
easting_aoi, northing_aoi = subset_data(grid1, grid2, config)
save_selected_region(easting_aoi, northing_aoi, config.zone,
'testPythonROI.txt')
def load_data(filename1, filename2, nrows, ncols):
"""It would really be good to use more descriptive variable names than "L1"
and "PF". I have no idea what L1 and PF are, so I'm just calling them
grid1 and grid2."""
grid1 = np.fromfile(filename1, dtype=np.float32).reshape(nrows, ncols)
grid2 = np.fromfile(filename2, dtype=np.float32).reshape(nrows, ncols)
return grid1, grid2
def subset_data(grid1, grid2, config):
"""Select points that satisfy some threshold criteria (explain??) and are
within a user-specified rectangular AOI."""
northing, easting = np.mgrid[:config.nrows, :config.ncols]
easting = config.xstart + config.xgridsize * easting
northing = config.ystart + config.ygridsize * northing
grids = grid1, grid2, easting, northing
grid1, grid2, easting, northing = [item[config.user_aoi] for item in grids]
mask = (grid1 >= config.metal_threshold) & (grid2 >= config.pf_threshold)
return easting[mask], northing[mask]
def save_selected_region(easting, northing, zone, filename):
"""Convert the given eastings and northings (in UTM zone "zone") to
lat/long and save to a tab-delimited-text file."""
lat, lon = utm2geographic(easting, northing, zone)
data = np.vstack([easting, northing, lat, lon]).T
with open(filename, 'w') as outfile:
outfile.write('EastingtNorthingtLatitudetLongituden')
np.savetxt(outfile, data, delimiter='t')
def utm2geographic(easting, northing, zone):
"""We need to know which datum/ellipsoid the UTM coords are in as well!!!!
I'm assuming it's a Clark 1866 ellipsoid, based on the numbers in your
code..."""
utm = pyproj.Proj(proj='utm', zone=zone, ellip='clrk66')
geographic = pyproj.Proj(proj='latlong', ellip='clrk66')
return pyproj.transform(utm, geographic, easting, northing)
class Config(object):
"""Read and store configuration values for (something?)."""
config_file = 'config.txt'
def __init__(self, filename):
"""You should add docstrings to clarify what you're expecting
"filename" to contain."""
with open(filename, 'r') as infile:
crs_values = list(infile)[12].split(',')
crs_values = [float(item) for item in crs_values]
self.xstart, self.ystart = crs_values[3:5]
self.xgridsize, self.ygridsize = crs_values[5:7]
self.zone = int(crs_values[7])
with open(self.config_file, 'r') as infile:
srs_values = list(infile)
self.nrows, self.ncols = srs_values[1], srs_values[4]
# It would be good to explain a bit about these (say, units, etc)
self.metal_threshold = 5.0
self.pf_threshold = 0.9
self.user_aoi = self.read_user_aoi()
def read_user_aoi(self):
"""Get an area of interest of the grids in pixel coordinates."""
top_left = raw_input('Enter top left indext')
bottom_right = raw_input('Enter bottom right indext')
min_i, min_j = [int(item) for item in top_left.split()]
max_i, max_j = [int(item) for item in bottom_right.split()]
return slice(min_i, max_i), slice(min_j, max_j)
if __name__ == '__main__':
main()