C语言 MPI 程序运行良好,但有时会抛出 Segfault11,有时会抛出 aborttrap6



Hy

我编写了一个 MPI 程序,它将矩阵划分为网格,然后将网格分散在 CPU 之间。这是一个矩阵-矩阵-乘法。我的程序运行良好并输出正确的结果,至少有时是这样。

有时我几乎在开始时就收到中止

陷阱 6 错误(在代码中标记),有时我在循环中遇到分段错误 11,我将矩阵从行主顺序重新排列为允许我分散网格的顺序(也在代码中标记)。我还遇到了一些BusError10。错误大部分时间发生在我标记的代码点,但有时也会发生在其他地方。

我真的很绝望,因为它有时会起作用,当它抛出错误时,它甚至不是同一个错误,也不是在代码中的同一点,这是我真的没有得到的。

我还认为,只有当我一个接一个地多次运行程序时,才更有可能发生错误。

你看到我的错误了吗?

这是代码:(很多,但我用长行标记了错误部分)

int main(int argc, char **argv) {
    //Initializing communication....
    MPI_Init(&argc, &argv);
    int size = atoi(argv[1]);
    int delta = 10;
    int world_rank;
    int world_size;
    int root = 0;
    // MPI_Status mystatus;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    // Calculate sqrt of world size
    int root_of_worldsize = sqrt((double)world_size);
    if (world_rank == root) {
        printf("The square-root of the worldsize is %dn", root_of_worldsize);
    }
    // Setup for initializing groups
    int row_rank_a, column_rank_b;
    int **rowranks = malloc(root_of_worldsize*sizeof(int*));
    int **columnranks = malloc(root_of_worldsize*sizeof(int*));
    for (int i = 0; i < root_of_worldsize; i++) {
        rowranks[i] = malloc(root_of_worldsize*sizeof(int));
        columnranks[i] = malloc(root_of_worldsize*sizeof(int));
        for (int j = 0; j < root_of_worldsize ; j++) {
            rowranks[i][j] = (i*root_of_worldsize + j);
            columnranks[i][j] = (j*root_of_worldsize + i);
        }
    }
    //printing rank array
    if (world_rank == root) {
        printf("Colum ranks: ");
        printf("[");
        for (int i = 0; i < root_of_worldsize; i++) {
            printf("[");
            for (int j = 0; j < root_of_worldsize; j++) {
                printf("%d, ", columnranks[i][j]);
            }
            printf("]");
        }
        printf("]n");
    }
    if (world_rank == root) {
        printf("Row ranks: ");
        printf("[");
        for (int i = 0; i < root_of_worldsize; i++) {
            printf("[");
            for (int j = 0; j < root_of_worldsize; j++) {
                printf("%d, ", rowranks[i][j]);
            }
            printf("]");
        }
        printf("]n");
    }
    MPI_Group world_group, rows_groupa, columns_groupb;
    MPI_Comm rowa_comm, columb_comm;

    //Get world group handle...
    MPI_Comm_group(MPI_COMM_WORLD, &world_group);
    //check compatibility of size and number of processors
    assert(size % world_size == 0);
    // Create groups
    for (int i = 0; i < root_of_worldsize; i++) {
        if (i*root_of_worldsize <= world_rank && world_rank < (i+1)*root_of_worldsize) {
            //printf("Rank %d; I am getting assigned to the %d row group.n", world_rank, i+1);
            MPI_Group_incl(world_group, root_of_worldsize, rowranks[i], &rows_groupa);
        }
        if (world_rank % root_of_worldsize == i) {
            //printf("Rank %d; I am getting assigned to the %d column group.n", world_rank, i+1);
            MPI_Group_incl(world_group, root_of_worldsize, columnranks[i], &columns_groupb);
        }
        if (world_rank == root) {
            printf("n");
        }
    }
    // Create new communicators
    MPI_Comm_create(MPI_COMM_WORLD, rows_groupa, &rowa_comm);
    MPI_Comm_create(MPI_COMM_WORLD, columns_groupb, &columb_comm);
    // Get respective group ranks
    MPI_Group_rank(rows_groupa, &row_rank_a);
    MPI_Group_rank(columns_groupb, &column_rank_b);
    printf("worldrank = %d; rowrank = %d; columnrank = %dn", world_rank, row_rank_a, column_rank_b);
    double *matrixA;
    double *matrixB;
    int chunk_size = size / root_of_worldsize;
    if (world_rank == root) {
    printf("Chunk size: %dn",chunk_size);
    printf("Root of worldsize: %dn", root_of_worldsize);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    if (world_rank == root) {
        // Create two matrices
        printf("Creating matrices...n");
        double *matrixA_i = malloc(size*size*sizeof(double));
        double *matrixB_i = malloc(size*size*sizeof(double));
        double **matrixA_2d = malloc(root_of_worldsize*sizeof(double*));
        for (int i = 0; i < size; i++) {
            matrixA_2d[i] = malloc(chunk_size*chunk_size*sizeof(double));
        }
        double **matrixB_2d = malloc(root_of_worldsize*sizeof(double*));
        for (int i = 0; i < size; i++) {
            matrixB_2d[i] = malloc(chunk_size*chunk_size*sizeof(double));
        }
        srand(1234);
        for (int i = 0; i < size; i++) {
            for (int j = 0; j < size; j++) {
                matrixA_i[i*size + j] = rand() % delta + 1;
            }
        }
        srand(2345);
        for (int i = 0; i < size; i++) {
            for (int j = 0; j < size; j++) {
                matrixB_i[i*size + j] = rand() % delta + 1;
            }
        }

-----------------Abort trap 6 is happening around here or also at the every end of programm------------------------------------------------------

        printf("Created matrices.n");
        printf("Matrix B:n");
        print_contiguous_matrix_array(matrixB_i, size);
        printf("Matrix A:n");
        print_contiguous_matrix_array(matrixA_i, size);

-----------------In this for loop is the Seg11 fault------------------------------------------------------
        // Rearrange the matrix to a "major-row-grid"-matrix
        printf("Rearranging matrices for grid scatteringn");
        int k = 0;
        int j = 0;
        int l = 0;
        for (int i = 0; i < (size*size); i++) {
            if (i == 0) {
                //Insert:
                //printf("Counters: k->%d, l->%d, j->%dn",k,l,j);
                matrixA_2d[k+root_of_worldsize*j][(i%chunk_size) + l*chunk_size] = matrixA_i[i];
                //printf("Writing on: [%d][%d]n", k+root_of_worldsize*j, i - ((j*chunk_size*chunk_size*root_of_worldsize)+(l*chunk_size*root_of_worldsize)+(k*chunk_size)));
                matrixB_2d[k+root_of_worldsize*j][(i%chunk_size) + l*chunk_size] = matrixB_i[i];
            } else {
                if (i % chunk_size == 0) {
                    k++;
                    if (k > (root_of_worldsize-1)) {
                        k = 0;
                    }
                    //printf("Raised k, k->%dn", k);
                }
                // Strip counter:
                if (i % (chunk_size*chunk_size*root_of_worldsize) == 0) {
                    j++;
                    //printf("Raised j, j->%dn", j);
                }
                // line counter:
                if (i % (chunk_size*root_of_worldsize) == 0) {
                    l++;
                    if (l > (chunk_size-1)) {
                        l = 0;
                    }
                    //printf("Raised l, l->%dn", l);
                }
                //Insert:
                printf("Counters: k->%d, l->%d, j->%d;  i->%dn",k,l,j,i);
                matrixA_2d[k+root_of_worldsize*j][(i%chunk_size) + l*chunk_size] = matrixA_i[i];
                printf("Writing on: [%d][%d]n", k+root_of_worldsize*j, (i%chunk_size) + l*chunk_size);
                matrixB_2d[k+root_of_worldsize*j][(i%chunk_size) + l*chunk_size] = matrixB_i[i];
            }
        }
        free(matrixA_i);
        free(matrixB_i);
        // 2d to 1d array
        //printf("2d A: ");
        //print_matrix(matrixA_2d, size, size);
        //printf("2d B: ");
        //print_matrix(matrixB_2d, size, size);
        //Two to one dimensional
        printf("converting from to to one dimensionaln");
        int counter = 0;
        matrixB = malloc(size*size*sizeof(double));
        matrixA = malloc(size*size*sizeof(double));
        for (int i = 0; i < world_size; i++) {
            for (int j = 0; j < chunk_size; j++) {
                for (int k = 0; k < chunk_size; k++) {
                    matrixA[counter] = matrixA_2d[i][j*chunk_size + k];
                    matrixB[counter] = matrixB_2d[i][j*chunk_size + k];
                    counter++;
                }
            }
        }
        //free 2d
        for (int q = 0; q < root_of_worldsize; q++) {
            free(matrixA_2d[q]);
            free(matrixB_2d[q]);
        }
        free(matrixB_2d);
        free(matrixA_2d);
        //printf("Rearranged B ");
        //print_contiguous_matrix_array(matrixB, size);
        //printf("Rearranged A ");
        //print_contiguous_matrix_array(matrixA, size);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    //Scatter....
    double *matrixA_chunk = malloc(chunk_size*chunk_size*sizeof(double));
    double *matrixB_chunk = malloc(chunk_size*chunk_size*sizeof(double));
    double *matrixA_tmp_chunk = malloc(chunk_size*chunk_size*sizeof(double));
    double *matrixB_tmp_chunk = malloc(chunk_size*chunk_size*sizeof(double));
    double *result_chunk = calloc(chunk_size*chunk_size, sizeof(double));
    MPI_Scatter(matrixA, chunk_size*chunk_size, MPI_DOUBLE, matrixA_chunk, chunk_size*chunk_size, MPI_DOUBLE, root, MPI_COMM_WORLD);
    MPI_Scatter(matrixB, chunk_size*chunk_size, MPI_DOUBLE, matrixB_chunk, chunk_size*chunk_size, MPI_DOUBLE, root, MPI_COMM_WORLD);
    for (int z = 0; z < root_of_worldsize; z++) {
        if (row_rank_a == z) {
            matrixA_tmp_chunk = matrixA_chunk;
        }
        MPI_Bcast(matrixA_tmp_chunk, chunk_size*chunk_size, MPI_DOUBLE, z, rowa_comm);
        /*if (world_rank == 0) {
            printf("temporary A: ");
            print_contiguous_matrix_array(matrixA_tmp_chunk, chunk_size);
        }*/
        if (column_rank_b == z) {
            matrixB_tmp_chunk = matrixB_chunk;
            MPI_Bcast(matrixB_tmp_chunk, chunk_size*chunk_size, MPI_DOUBLE, z, columb_comm);
        }
        MPI_Bcast(matrixB_tmp_chunk, chunk_size*chunk_size, MPI_DOUBLE, z, columb_comm);
        printf("Iteration: %d; Rank %d; row_rank %d; temporary A matrix: %f, %f, %f, %fn", z, world_rank, row_rank_a, matrixA_tmp_chunk[0], matrixA_tmp_chunk[1], matrixA_tmp_chunk[2], matrixA_tmp_chunk[3]);
        /*if (world_rank == 0) {
            printf("temporary B: ");
            print_contiguous_matrix_array(matrixB_tmp_chunk, chunk_size);
        }*/
        //calculate
        for (int i = 0; i < chunk_size; i++) {
            for (int j = 0; j < chunk_size; j++) {
                for (int k = 0; k < chunk_size; k++) {
                    result_chunk[j*chunk_size + i] += (matrixA_tmp_chunk[j*chunk_size + k] * matrixB_tmp_chunk[k*chunk_size + i]);
                }
            }
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }
    double *final_result;
    double *contiguous_final_result = NULL;
    if (world_rank == root) {
        final_result = malloc(size*size*sizeof(double));
        contiguous_final_result = malloc(size*size*sizeof(double));
    }
    MPI_Gather(result_chunk, chunk_size*chunk_size, MPI_DOUBLE, final_result, chunk_size*chunk_size, MPI_DOUBLE, root, MPI_COMM_WORLD);
    if (world_rank == root) {
        printf("final result major grid: ");
        print_contiguous_matrix_array(final_result, size);
    }
    // Rearrange gridded matrix to row major matrix
    if (world_rank == root) {
        int l2 = 0;
        int k2 = 0;
        int s2 = 0;
        for (int i = 0; i < (size*size); i++) {
            if (i == 0) {
                contiguous_final_result[(i%chunk_size) + l2*size + s2*size*chunk_size + k2*chunk_size] = final_result[i];
                printf("Access values: i->%d; l->%d; s->%d; k->%d; total->%dn", i, l2, s2, k2, (i%chunk_size) + l2*size + s2*size*chunk_size + k2*chunk_size);
            }
            else {
                if (i % chunk_size == 0) {
                    l2++;
                    if (l2 > (chunk_size-1)) {
                        l2 = 0;
                    }
                }
                if (i % (chunk_size*chunk_size*root_of_worldsize) == 0) {
                    s2++;
                }
                if (i % (chunk_size*chunk_size) == 0) {
                    k2++;
                    if (k2 > (root_of_worldsize-1)) {
                        k2 = 0;
                    }
                }
                contiguous_final_result[(i%chunk_size) + l2*size + s2*size*chunk_size + k2*chunk_size] = final_result[i];
                printf("Access values: i->%d; l->%d; s->%d; k->%d; total->%dn", i, l2, s2, k2, (i%chunk_size) + l2*size + s2*size*chunk_size + k2*chunk_size);
            }
        }
    }
    if (world_rank == root) {
        printf("Row major result: ");
        print_contiguous_matrix_array(contiguous_final_result, size);
    }
    //free!!!!!!
    if (world_rank == root) {
        free(matrixA);
        free(matrixB);
        free(final_result);
        free(contiguous_final_result);
    }
    free(matrixA_chunk);
    free(matrixB_chunk);
    free(result_chunk);
    MPI_Finalize();
    return 0;
}

提前非常感谢!

问题很可能是这两个分配:

int **rowranks = malloc(root_of_worldsize*sizeof(int));
int **columnranks = malloc(root_of_worldsize*sizeof(int));

在这里,您将变量声明为基本上是指针数组,但不为指针分配内存。如果 int 的大小小于 int* 的大小(通常在所有现代 64 位系统上都是这样),这将导致未定义的行为

我发现了问题!正是在代码的这一部分:

    double *matrixA_i = malloc(size*size*sizeof(double));
    double *matrixB_i = malloc(size*size*sizeof(double));
    double **matrixA_2d = malloc(root_of_worldsize*sizeof(double*));
    for (int i = 0; i < size; i++) {
        matrixA_2d[i] = malloc(chunk_size*chunk_size*sizeof(double));
    }
    double **matrixB_2d = malloc(root_of_worldsize*sizeof(double*));
    for (int i = 0; i < size; i++) {
        matrixB_2d[i] = malloc(chunk_size*chunk_size*sizeof(double));
    }

我没有为 2d 阵列分配正确的大小,非常感谢 @Joachim Pileborg,您的回答让我走上了正确的道路,寻找什么!!

相关内容

  • 没有找到相关文章

最新更新