c-分段错误MPI创建结构



我正在编写一个MPI程序,该程序解决了N皇后区问题,进程0应该部分解决该问题,并让其他进程完成它。我的程序进行了编译,但在创建结构类型并尝试发送结构时,它向我显示了分段错误。这发生在spawn_processes函数中(由进程0执行,因此segfault发生在进程0中),同时尝试发送subProblemType

这是我的代码:

#include <stdio.h>
#include <mpi.h>

#define MAXBOARDSIZE 8
static int board_size=8;
typedef struct boardplus{
  int size; // board size
  int x; // this is where we need to restart
  int y; // idem
  int board[MAXBOARDSIZE]; // the actual board, padded to largest instance
} subproblem;


#define  INIT   1  // Message to client:  subproblem
#define  DATA   2  // Message from client with results
#define  EXIT   4  // Message from client with CPU time
                   // Also to client, giving permission to exit
static long int N_solutions;
int solution_count;
void spawn_processes(int rows[board_size],int y){
    //printf("_______________________");
    subproblem subP;
    int col,//column number to start from
        count,//number of solutions recieved from a worker
        nProc,//total number of processes
        proc,
        nActive,i;// number of active processes

    MPI_Status status;
    MPI_Datatype subProblemType;
    MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};
    int block_len[4]={1,1,1,MAXBOARDSIZE};
    MPI_Aint disp[4];

    MPI_Address(&subP,disp);
    MPI_Address(&subP.x,disp+1);
    MPI_Address(&subP.y,disp+2);
    MPI_Address(&subP.board,disp+3);
    int base=disp[0];
    for(i=0;i<4;i++) disp[i]-=base;
    MPI_Type_create_struct(4,block_len,disp,type,&subProblemType);
    MPI_Type_commit(&subProblemType);
    MPI_Comm_size(MPI_COMM_WORLD,&nProc);
    subP.size=board_size;
    subP.y=y;
    //subP.board=rows;
    for(i=0;i<y;i++)subP.board[i]=rows[i];
    printf("spawning processes  ....n");
    for(col=0,proc=1;proc<nProc && col<board_size;proc++, col++){
        printf("sending to process %d n ",proc);
        fflush(stdout);
       subP.x=col;
       MPI_Send(&subP,1,subProblemType,proc,INIT,MPI_COMM_WORLD);
    }
    nActive=proc-1;
    // Receive back results and send out new problems
    while(col<board_size){
        MPI_Recv(&count,1,MPI_INT,MPI_ANY_SOURCE,DATA,MPI_COMM_WORLD,&status);
        proc=status.MPI_SOURCE;
        printf("recieved from process %d, found %d solutions n",proc,count);
        N_solutions+=count;
        subP.x=col++;
        MPI_Send(&subP,1,subProblemType,proc,INIT,MPI_COMM_WORLD);
    }
    // Finally, receive back pending results and send termination
    // indication (message with size of zero).
    subP.size=0;
    while(nActive>0){
        MPI_Recv(&count,1,MPI_INT,MPI_ANY_SOURCE,DATA,MPI_COMM_WORLD,&status);
        proc=status.MPI_SOURCE;
        printf("recieved from process %d, found %d solutions n",proc,count);
        --nActive;
        N_solutions+=count;
        //send a subproblem with size=0 (termination messages)
        MPI_Send(&subP,1,subProblemType,proc,INIT,MPI_COMM_WORLD);
    }
    for (proc = 1; proc < nProc; proc++)  
        MPI_Send(&proc, 0, MPI_INT, proc, EXIT, MPI_COMM_WORLD);

}


void process_queens(int my_id){
    int root=0;
    subproblem subP;
    MPI_Status status;
    int rows[board_size];
    int x,y,i;  
    MPI_Datatype subProblemType;
    MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};
    int block_len[4]={1,1,1,MAXBOARDSIZE};
    MPI_Aint disp[4];

    MPI_Address(&subP,disp);
    MPI_Address(&subP.x,disp+1);
    MPI_Address(&subP.y,disp+2);
    MPI_Address(&subP.board,disp+3);
    int base=disp[0];
    for(i=0;i<4;i++) disp[i]-=base;
    MPI_Type_create_struct(4,block_len,disp,type,&subProblemType);
    MPI_Type_commit(&subProblemType);
    printf("process %d waiting to recieve a taskn",my_id);
    fflush(stdout);
    MPI_Recv(&subP,1,subProblemType,root,INIT,MPI_COMM_WORLD,&status);
    while(subP.size>0){
    x=subP.x;
    y=subP.y;
    for(i=0;i<y;i++)rows[i]=subP.board[i];
    //rows=subP.board;

    if(is_safe(rows,x,y)){
    rows[y]=x;
    n_queens_solver(rows,y+1);
    }
    MPI_Send(&N_solutions,1,MPI_INT,root,DATA,MPI_COMM_WORLD);
    }
    // Final hand-shake:  get permission to terminate
   MPI_Recv(&N_solutions, 0, MPI_INT, 0, EXIT, MPI_COMM_WORLD, &status);
}
int is_safe(int rows[board_size], int x, int y)  
{
    int i;
    if (y == 0)
            return 1;
    for (i=0; i < y; ++i) {
       if (rows[i] == x || rows[i] == x + y - i || rows[i] == x - y +i)
            return 0;
    } 
    return 1;
}

void n_queens_solver(int rows[board_size], int y) 
{
    int x;
    for (x=0; x < board_size; ++x) {
        if (is_safe(rows, x, y)) {
            rows[y] = x;
            if (y == board_size-1) {
        ++N_solutions;
        }
            else
              n_queens_solver(rows, y+1);
        }
    }
}

void n_queens_expander(int rows[board_size], int y, int expand_levels)
{
  int x;
 if(y == expand_levels-1){
    spawn_processes(rows,y);
    }
else{
     for (x=0; x < board_size; ++x) {
    if (is_safe(rows, x, y)) 
      {
    rows[y] = x;
    n_queens_expander(rows, y+1, expand_levels-1);
      }
  }
  }
}

int main(int argc,char *argv[]) {
int rows[board_size];
//int expand_levels=1;
int numproc,my_id;
MPI_Status status;
MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&numproc);
    MPI_Comm_rank(MPI_COMM_WORLD,&my_id);
    //printf("number of processes:%d n",numproc);
    if(my_id==0){
        //printf("process 0 starting...n");
        n_queens_expander(rows,0,1);
    }
    else{
        process_queens(my_id);
}
MPI_Finalize();
return 0;
}

您的错误是正确的。。。

MPI_Address(&subP,disp);
MPI_Address(&subP.x,disp+1);
MPI_Address(&subP.y,disp+2);
MPI_Address(&subP.board,disp+3);
int base=disp[0]; // <--------------------- HERE
for(i=0;i<4;i++) disp[i]-=base;

在LP64系统(包括运行OS X、FreeBSD、Solaris或Linux的64位x86系统)上,MPI_Aint为8字节长,而int仅为4字节长。subP是一个堆栈变量,x64上主线程的堆栈位于虚拟地址空间的高处,因此在分配给base时会发生截断,计算的位移与实际位移无关。

解决方案:base的类型应为MPI_Aint

解决方案2:

for(i=1;i<4;i++) disp[i]-=disp[0];
disp[0] = 0;

所有列中都存在相同的问题,但由于主崩溃且不发送数据,工作线程中的MPI_Recv从不向内存写入,因此它们不会出错。

请记住始终使用-Wall进行编译,并注意编译器产生的警告消息。

虽然我不熟悉MPI,但直觉和一些快速的谷歌搜索表明,你需要在结构中为该数组声明一个类型,而不是将其作为整数传递。*将其作为整型传递可能只会将指向数组中第一个项的指针发送到远程机器,然后可能是远程机器上的某个伪内存地址,当远程机器使用该地址时,会导致segfault。

看看这个答案:为包含指针的结构创建MPI_Datatype。你也许可以摆脱这个基本想法。请注意,从简短的略读来看,Hristo Iliev在该线程中的响应可能是您的程序结构上更好的方法。

*我假设这就是MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};行的意思

你有这个:

static int board_size=8;
typedef struct boardplus{
  int size; // board size
  int x; // this is where we need to restart
  int y; // idem
 int board[MAXBOARDSIZE]; // the actual board, padded to largest instance
} subproblem;

并制作这样的类型:

MPI_Datatype subProblemType;
MPI_Datatype type[4]={MPI_INT,MPI_INT,MPI_INT,MPI_INT};
int block_len[4]={1,1,1,MAXBOARDSIZE};
MPI_Aint disp[4];

MPI_Address(&subP,disp);
MPI_Address(&subP.x,disp+1);
MPI_Address(&subP.y,disp+2);
MPI_Address(&subP.board,disp+3);
int base=disp[0];
for(i=0;i<4;i++) disp[i]-=base;
MPI_Type_create_struct(4,block_len,disp,type,&subProblemType);

我对你对位移的修正持怀疑态度。更常见的是,处理MPI_Address类型的事情的方法是使用MPI_BOTTOM作为缓冲区。因此,您的发送不是发送subP,而是如下所示:

MPI_Send(MPI_BOTTOM,1,subProblemType,proc,INIT,MPI_COMM_WORLD);

最新更新