我对MPI完全陌生,因此,有人能告诉我我在以下代码中做错了什么吗?我根据我在stackoverflow上找到的答案构造了这段代码,所以我只是稍微修改了一下以适应我的需要。我面临的问题是,在进程0中创建的2D数组"a"不能被排名为1、2和3的其他进程看到(我只有四个处理器)。也就是说,当进程想要在函数matrix_element()中使用这个矩阵"a"时,即(发生的地方用星号*表示)
cc[m][n]=matrix_element(m,n,x,ngauher,A)
被一个等级大于0的进程调用,程序以一个分段错误终止。只有根进程0能够使用这个数组在每个进程中生成子数组cc[m][n]。我尝试使用
将2D数组"A"广播到所有进程 for(m=0;m<MAX_M;m++) {
MPI_Bcast(A[m],MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD);
}
,以便每个进程都可以使用它,但是代码更早地终止了它,因为在代码中稍后调用的MPI_Barrier发生了错误。我只是想知道如何广播"A"到所有进程正确,如果有任何错误的"A"的调用内部的matrix_element()函数由所有进程使用。无论如何,代码遵循
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "nrutil.h"
#define SQR(x) ((x)*(x))
double x[1000],w[1000];
double result,global_result;
double **A,**c,**cc;
long nrl,nrh,ncl,nch;
int i,j,MAX_M,n,m,k;
int mx,my,mz,nx,ny,nz;
int ngauher;
int lstart,lend,id,p,num_elements;
int count;
FILE *ingau,*inc,*mdat,*outptr_A;
FILE *globalarrayptr=NULL;
FILE *globalarrayptr2=NULL;
FILE *localarrayptr=NULL;
char *globalarray;
char *localarray;
int malloc2d(double ***array, int n, int m);
double **dmatrix(long nrl, long nrh, long ncl, long nch);
double matrix_element(int m, int n, double **A);
double hermitef(double u, int m);
double calculate_c(int m,int n, double *x, double *w, int ngauher);
double *h;
int main(int argc, char **argv) {
const int MAX_M=10;
const int procMAX_M=2;
const int ngauher=20;
int i,j,k,id,p,disp;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&id);
MPI_Comm_size(MPI_COMM_WORLD,&p);
if((localarrayptr = fopen("a.dat","w")) == NULL) {
fprintf(stderr,"Could not open file a.datn");
exit(1);
}
if(id == 0) {
if((ingau = fopen("gauspnts.dat","r")) == NULL) {
fprintf(stderr,"Could not open file gauspnts.dat");
exit(1);
}
if((globalarrayptr = fopen("c.dat","w")) == NULL) {
fprintf(stderr,"Could not open file c.datn");
exit(1);
}
printf(" opened files n");
h = (double *)calloc(MAX_M,sizeof(double));
nrl=0;
nrh=MAX_M;
ncl=0;
nch=MAX_M;
malloc2d(&c,MAX_M,MAX_M);
malloc2d(&A,MAX_M,MAX_M);
for(i=0;i<nrh;i++) {
for(j=0;j<nch;j++) {
c[i][j]=0.0;
fprintf(globalarrayptr," %g ",c[i][j]);
}
fprintf(globalarrayptr,"n");
}
for(k=0;k<ngauher;k++) {
fscanf(ingau," %lf %lf n",&x[k],&w[k]);
printf(" %g %g n",x[k],w[k]);
}
// MPI_Bcast(x,ngauher,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work
// MPI_Bcast(w,ngauher,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work
/* The problematic array is created right here in rank 0*/
for(m=0;m<MAX_M;m++) {
for(n=0;n<MAX_M;n++) {
A[m][n]=calculate_c(m,n,x,w,ngauher);
printf(" rank=%d A[%d][%d] = %g n",i,m,n,A[m][n]);
}
}
}
for(m=0;m<MAX_M;m++) {
MPI_Bcast(A[m],MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work and
// causes a seg fault
}
nrl=0;
nrh=MAX_M/procMAX_M;
ncl=0;
nch=MAX_M/procMAX_M;
malloc2d(&cc,MAX_M/procMAX_M,MAX_M/procMAX_M);
int sizes[2] = {MAX_M,MAX_M};
int subsizes[2] = {MAX_M/procMAX_M,MAX_M/procMAX_M};
int starts[2] = {0,0};
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&type);
MPI_Type_create_resized(type,0,MAX_M/procMAX_M*sizeof(double),&subarrtype);
MPI_Type_commit(&subarrtype);
double *globalptr=NULL;
if(id == 0) globalptr=&(c[0][0]);
int sendcounts[procMAX_M*procMAX_M];
int displs[procMAX_M*procMAX_M];
if(id == 0) {
for(i=0; i<procMAX_M*procMAX_M;i++) sendcounts[i] = 1;
disp = 0;
for(i=0;i<procMAX_M;i++) {
for (j=0; j<procMAX_M; j++) {
displs[i*procMAX_M+j] = disp;
disp += 1;
}
disp += ((MAX_M/procMAX_M)-1)*procMAX_M;
}
}
if (id == 0) {
for(i=0; i<procMAX_M*procMAX_M; i++) sendcounts[i] = 1;
disp = 0;
for(i=0; i<procMAX_M; i++) {
for (j=0; j<procMAX_M; j++) {
displs[i*procMAX_M+j] = disp;
disp += 1;
}
disp += ((MAX_M/procMAX_M)-1)*procMAX_M;
}
}
MPI_Scatterv(globalptr,sendcounts,displs,subarrtype,&(cc[0][0]),
MAX_M*MAX_M/(procMAX_M*procMAX_M),
MPI_DOUBLE,0,MPI_COMM_WORLD);
/* all processors print their local data */
for(i=0;i<p;i++) {
if(id == i) {
printf("Local process on rank %d is:n", id);
for(m=0;m<MAX_M/procMAX_M;m++) {
putchar('|');
for (n=0; n<MAX_M/procMAX_M;n++) {
// if(id == 0) { j=m; k=n; }
// if(id == 1) { j=m+procMAX_M-1; k=n; }
// if(id == 2) {j=m; k=n+procMAX_M-1; }
// if(id == 3) {j=m+procMAX_M-1; k=n+procMAX_M-1; }
/* ************************************************************************* */
cc[m][n]=matrix_element(m,n,A); // THE PROBLEM WITH "A" ARISES HERE!!
/* ************************************************************************* */
printf(" cc[%d][%d] = %g id = %dn",m,n,cc[m][n],id);
}
printf("|n");
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* it all goes back to process 0 */
MPI_Gatherv(&(cc[0][0]),MAX_M*MAX_M/(procMAX_M*procMAX_M),MPI_DOUBLE,
globalptr,sendcounts,displs,subarrtype,0, MPI_COMM_WORLD);
/* don't need the local data anymore */
/* or the MPI data type */
MPI_Type_free(&subarrtype);
if (id == 0) {
if((globalarrayptr2 = fopen("B.dat","w")) == NULL) {
fprintf(stderr,"Could not open file B.datn");
exit(1);
}
for(i=0;i<MAX_M;i++) {
for(j=0;j<MAX_M;j++) {
fprintf(globalarrayptr2," %g ",c[i][j]);
}
fprintf(globalarrayptr2,"n");
}
}
MPI_Finalize();
return 0;
}
int malloc2d(double ***array, int n, int m) {
double *p;
/* allocate the n*m contiguous items */
p = (double *)malloc(n*m*sizeof(double));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (double **)malloc(n*sizeof(double *));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i=0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
double calculate_c(int m, int n, double *x, double *w, int ngauher) {
double result=0;
int i;
for(i=0;i<ngauher;i++) {
result+=w[i]*exp(-SQR(x[i]))*SQR(hermitef(x[i],m)*hermitef(x[i],n));
}
return(result);
}
double hermitef(double u, int m) {
int j;
double x,pi;
pi=acos(-1.);
x=u;
h[0]=1./pow(pi,0.25);
h[1]=sqrt(2.)*x/pow(pi,0.25);
for(j=2;j<m+1;j++) {
h[j] = sqrt(2./(double)j)*x*h[j-1]-sqrt((double)(j-1)/((double)j))*h[j-2];
}
return(h[m]);
}
double matrix_element(int m, int n, double **A) {
result=0.;
/* In this function, A is seen only by the root process ?? */
for(mx=0;mx<=m;mx++) {
for(my=0;my<=m;my++) {
for(mz=0;mz<=m;mz++) {
for(nx=0;nx<=n;nx++) {
for(ny=0;ny<=n;ny++) {
for(nz=0;nz<=n;nz++) {
if(((mx+my+mz == m) && (nx+ny+nz == n))) {
result+=A[mx][nx]*A[my][ny]*A[mz][nz];
}
}
}
}
}
}
}
return(result);
}
如果您想将您的数组MPI_Bcast()
分配给每个进程,则您的数组应该分配size
次,每个进程一次。在秩0上分配是不够的。问题是:
if(id==0){
...
malloc2d(&A,MAX_M,MAX_M);
...
}
尝试将malloc2d(&A,MAX_M,MAX_M);
从此测试中取出,MPI_Bcast()
将正常工作。
MPI_Bcast(A[0],MAX_M*MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD);
再见,