使用线程创建与pthread_create调用函数MPI_Finalize MPI 应用程序中用 C 编写



首先,我确定我是法国人,我的英语不是很好。

正在研究MPI应用程序,我遇到了一些问题,我希望有人可以帮助我。

正如我帖子标题中所报告的,当我必须终止我的应用程序然后调用MPI_Finalize函数时,我尝试使用线程来侦听。

但是,我的应用程序没有完成更正。更准确地说,我得到以下消息:

[XPS-2720:27441] * 过程接收信号 *

[XPS-2720:27441] 信号:分段错误 (11)

[XPS-2720:27441] 信号代码:地址未映射 (1)

[XPS-2720:27441] 地址失败:0x7f14077a3b6d

[XPS-2720:27440] * 过程接收信号 *

[XPS-2720:27440] 信号:分段错误 (11)

[XPS-2720:27440] 信号代码:地址未映射 (1)

[XPS-2720:27440] 地址失败:0x7fb11d07bb6d


mpirun 注意到节点 lagniez-XPS-2720 上具有 PID 27440 的进程等级 1 在信号 11 上退出(分段错误)。

我的从属代码是:

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <sys/types.h>
#include <pthread.h>
#include <cassert>
#define send_data_tag 1664
#define send_kill_tag 666
void *finilizeMPICom(void *intercomm)
{ 
  printf("the finilizeMPICom was calledn");
  
  MPI_Comm parentcomm = * ((MPI_Comm *) intercomm);
  MPI_Status status;
  int res;
  // sleep(10);
  MPI_Recv(&res, 1, MPI_INT, 0, send_kill_tag, parentcomm, &status);
  int rank;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  printf("we receive something %d -- %dn", rank, res);
  
  MPI_Finalize();
  exit(0);
}// finilizeMPICom
int main( int argc, char *argv[])
{ 
  int  numtasks, rank, len, rc; 
  char hostname[MPI_MAX_PROCESSOR_NAME];
  int provided, claimed;
  rc = MPI_Init_thread(0, 0, MPI_THREAD_MULTIPLE, &provided);
  MPI_Query_thread( &claimed );
  
  if (rc != MPI_SUCCESS || provided != 3)
    {
      printf ("Error starting MPI program. Terminating.n");
      MPI_Abort(MPI_COMM_WORLD, rc);
    }
  
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm parentcomm;
  MPI_Comm_get_parent(&parentcomm);
  /* create a second thread to listen when we have to kill the program */
  pthread_t properlyKill;
  if(pthread_create(&properlyKill, NULL, finilizeMPICom, (void *) &parentcomm))
    {     
      fprintf(stderr, "Error creating threadn");
      return 0;
    }
  
  assert(parentcomm != MPI_COMM_NULL);
  MPI_Status status;
  int root_process, ierr, num_rows_to_receive;
  int mode;
  MPI_Recv( &mode, 1, MPI_INT, 0, send_data_tag, parentcomm, &status);
  printf("c The solver works in the mode %dn", mode);
  printf("I sent a message %dn", rank);
  // if(rank != 1) sleep(100);
  
  int res = 1;
  MPI_Send(&res, 1, MPI_INT, 0, send_data_tag, parentcomm);  
  printf("we want to listen for somethiing %dn", rank);  
  
  int rescc = 1;
  MPI_Recv(&rescc, 1, MPI_INT, 0, send_data_tag, parentcomm, &status);
  printf("I received the message %d %dn", rescc, rank);
  
  if(rescc == 1000)
    {
      printf("~~~~~~~~>>> I print the solution %dn", rank);
      int res3 = 1001;
      MPI_Send(&res3, 1, MPI_INT, 0, send_data_tag, parentcomm);
    }
  else printf("I do not understand %dn", rank);
  printf("I wait the thread to kill the programm %dn", rank);
  pthread_join(properlyKill, (void**)&(res));
  return 0;
}

对于主人,我有:

int main(int argc, char **argv)
{  
  Parser *p = new Parser("slave.xml");
  MPI_Init(&argc, &argv);
  if(p->method == "concurrent")
    {
      ConcurrentManager cc(p->instance, p->solvers);
      cc.run();
    }
  else
    {
      cerr << "c The only available methods are: concurrent, eps (Embarrassingly Parallel Search) or tree" << endl;
      exit(1);
    }
  delete(p);
  MPI_Finalize();
  exit(0);
}// main
/**
   Create a concurrent manager (means init the data structures to run
   the solvers).
   
   @param[in] _instance, the benchmark path
   @param[in] _solvers, the set of solvers that will be ran
 */
ConcurrentManager::ConcurrentManager(string _instance, vector<Solver> &_solvers) :
  instance(_instance), solvers(_solvers)
{
  cout << "cnc Concurrent manager called" << endl;
  
  nbSolvers = _solvers.size();
  np = new int[nbSolvers];
  cmds = new char*[nbSolvers];
  arrayOfArgs = new char **[nbSolvers];
  infos = new MPI_Info[nbSolvers];
  for(int i = 0 ; i<nbSolvers ; i++)
    {
      np[i] = solvers[i].npernode;
      cmds[i] = new char[(solvers[i].executablePath).size() + 1];
      strcpy(cmds[i], (solvers[i].executablePath).c_str());      
      arrayOfArgs[i] = new char *[(solvers[i].options).size() + 1];
      for(unsigned int j = 0 ; j<(solvers[i].options).size() ; j++)
        {
          arrayOfArgs[i][j] = new char[(solvers[i].options[j]).size() + 1];
          strcpy(arrayOfArgs[i][j], (solvers[i].options[j]).c_str());          
        }
      arrayOfArgs[i][(solvers[i].options).size()] = NULL;
      MPI_Info_create(&infos[i]);
      char hostname[solvers[i].hostname.size()];
      strcpy(hostname, solvers[i].hostname.c_str());
      MPI_Info_set(infos[i], "host", hostname);
    }
  sizeComm = 0;
}// constructor
/**
   Wait that at least one process finish and return the code
   SOLUTION_FOUND.
   @param[in] intercomm, the communicator
 */
void ConcurrentManager::waitForSolution(MPI_Comm &intercomm)
{
  MPI_Status arrayStatus[sizeComm], status;
  MPI_Request request[sizeComm];
  int val[sizeComm], flag;
  for(int i = 0 ; i<sizeComm ; i++) MPI_Irecv(&val[i], 1, MPI_INT, i, TAG_MSG, intercomm, &request[i]);
  bool solutionFound = false;
  while(!solutionFound)
    {
      for(int i = 0 ; i<sizeComm ; i++)
        {
          MPI_Test(&request[i], &flag, &arrayStatus[i]);
          if(flag) 
            {
              printf("--------------------->    %d reveived %dn", i , val[i]);
              if(val[i] == SOLUTION_FOUND)
                {
                  int msg = PRINT_SOLUTION;
                  MPI_Send(&msg, 1, MPI_INT, i, TAG_MSG, intercomm); // ask to print the solution
                  int msgJobFinished;
                  MPI_Recv(&msgJobFinished, 1, MPI_INT, i, TAG_MSG, intercomm, &status);  // wait the answer
                  assert(msgJobFinished == JOB_FINISHED);
                  cout << "I am going to kill everybody" << endl;
                  
                  int msgKill[sizeComm];
                  for(int j = 0 ; j<sizeComm ; j++)
                    {
                      msgKill[i] = STOP_AT_ONCE;
                      MPI_Send(&msgKill[i], 1, MPI_INT, j, TAG_KILL, intercomm);
                    }
                  solutionFound = true;
                  break;
                } else
                {
                  printf("restart the communication for %dn", i);
                  MPI_Irecv(&val[i], 1, MPI_INT, i, TAG_MSG, intercomm, &request[i]);
                }
            }
        }      
    }
}// waitForSolution
/**
   Run the solver.
 */
void ConcurrentManager::run()
{
  MPI_Comm intercomm;
  int errcodes[solvers.size()];
  MPI_Comm_spawn_multiple(nbSolvers, cmds, arrayOfArgs, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes);
  
  MPI_Comm_remote_size(intercomm, &sizeComm);
  cout << "c Solvers are now running: " << sizeComm << endl;
  int msg = CONCU_MODE;
  for(int i = 0 ; i<sizeComm ; i++) MPI_Send(&msg, 1, MPI_INT, i, TAG_MSG, intercomm); // init the working mode
  
  waitForSolution(intercomm);
}// run

我知道我放了很多代码:(

但是,我不知道问题出在哪里。

请帮我:)

此致敬意。

MPI 文档关于 MPI 如何与线程交互,要求对MPI_Finalize()调用由主线程执行,即初始化 MPI 的同一线程。 在您的情况下,这也恰好是进程的初始线程。

为了满足 MPI 的要求,您可以重新组织应用程序,使初始线程是等待终止信号然后关闭 MPI 的线程。 然后,它当前所做的其他工作需要移动到不同的线程。

最新更新