c openmp-while循环的并行化



我想用OpenMP在C中并行化while循环。这是一个经典的while直到flag=false。

这是我在没有openMP 的情况下工作的半伪代码

//Something before
found = 0;
while (!found){
  //Pull an element from an array and do some work with it
  if(something) found = 1;
  //Do other work and put an element in the same array
}
//Something later

如果循环中的工作多做几次,对我来说不是问题,只是过度工作不会影响结果。有一种简单正确的方法可以用C?

感谢

根据要求,这是完整的代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <omp.h>
typedef struct location {int x, y;} location;
typedef struct node {
    int x, y;               // Coordinates of the node on the grid
    int x_prec, y_prec;     // Coordinates of the predecessor
    int walkable;           // Whether this node can be walked through
    int opened;             // It indicates if the node was been estimated
    int closed;             // It indicates if the node was been evaluated
    int inPath;             // If the node is in the shortest path
    int g;                  /* The past-cost function, which is the known distance from the starting
                             * node to the current one. */
    int f;                  /* The estimated-cost function, which is an admissible heuristic estimate
                             * of the distance from the starting node to the goal passing through the
                             * current node. */
    int h;                  // The estimated cost of the path between the current node and the goal
} node;
typedef struct list {
    location loc;
    int isValid;
} list;
location start,end;
node **grid;
int cols, rows;
double time1, time2, time3, time4, time5;
double time_aStar_s, time_aStar_e;
double time_getNeighbor_s, time_getNeighbor_e;
double time_while_s, time_while_e;
double time_for_s, time_for_e;
double time_for = 0;
double time_pull_s, time_pull_e;
double time_pull = 0;
int count_getNeighbor = 0;
int count_current = 0;
void setGrid(char [6]);
int checkLocation(location);
void aStar(int);
int isListEmpty(list *);
void constructPath(location);
void getNeighbor(location, location *);
int heuristic(location, location);
void printStatistics();
void saveStatistics(char *);
int main (int argc, char *argv[]){
    char input_map[20];
    int input_algo = -1;
    char input_stat[20];
    printf("Which map you prefer?n");
    scanf("%s", input_map);
    setGrid(input_map);
    printf("Enter a valid start point.n");
    scanf("%i", &start.x);
    scanf("%i", &start.y);
    printf("Enter a valid end point.n");
    scanf("%i", &end.x);
    scanf("%i", &end.y);
    if (checkLocation(start) || checkLocation(end)) printf("Invalid start and/or end points.n");
    printf("Dijkstra or A*?(press <0> or <1> respectively)n");
    scanf("%i", &input_algo);
    // Save when aStar is called
    time_aStar_s = omp_get_wtime();
    if(input_algo == 0) aStar(0);           // 0 for Dijkstra
    else if (input_algo == 1) aStar(1);         // 1 for A*
    // Save when aStar finishes
    time_aStar_e = omp_get_wtime();
    printf("End of the program. n");
    printStatistics();
    printf("Would you like to save the statistics?(Enter <y> or <n> respectively)n");
    scanf("%s", input_stat);
    if(input_stat[0] == 'y'){
        printf("Enter file name.n");
        scanf("%s", input_stat);
        saveStatistics(input_stat);
    }
    return(0);
}
void setGrid(char mapName[6]) {
    char temp[1024];
    char fileName[20];
    int i,j;
    FILE *file;
    // Try to open the file
    strcpy(fileName, "src/maps/");
    strcat(fileName, mapName);
    if((file = fopen(fileName, "r")) == NULL){
        printf("ERROR: No such file.n");
        exit(1);
    }
    // Save dimensions of the map
    rows = 0;
    while(42){
        if(fscanf(file, "%s", temp) == EOF){
            printf("EOFn");
            printf("columns: t%i nrows: tt%in", cols, rows);
            break;
        }
        printf("%sn", temp);
        cols = strlen(temp);
        rows++;
    }
    // Reset the file position indicator
    rewind(file);
    // Set dimensions of grid matrix
    grid = (node **)malloc(rows * sizeof(node*));
    for(i = 0; i < rows; i++) grid[i] = (node *)malloc(cols * sizeof(node));
    i=0;
    while(42){
        if(fscanf(file, "%s", temp) == EOF) break;
        for (j = 0; j < cols; j++) {
            grid[i][j].x = i;
            grid[i][j].y = j;
            grid[i][j].x_prec = -1;
            grid[i][j].y_prec = -1;
            if(temp[j] == '#') {
                grid[i][j].walkable = 0;
            } else if(temp[j] == '-') {
                grid[i][j].walkable = 1;
            }
            grid[i][j].opened = 0;
            grid[i][j].closed = 0;
            grid[i][j].inPath = 0;
            grid[i][j].g = -1;
            grid[i][j].f = -1;
        }
        i++;
    }
    fclose(file);
}
void printGrid(int option) {
    int i,j;
    switch(option){
    case 0:
        // It prints grid with start point, end point and optimal path
        for(i = 0; i < rows; i++) {
            for(j = 0; j < cols; j++) {
                if(i == start.x && j == start.y) printf("S");
                else if(i == end.x && j == end.y) printf("E");
                else if (grid[i][j].walkable){
                    if (grid[i][j].inPath) printf("+");
                    else printf(" ");
                } else printf("#");
            }
            printf("n");
        }
        printf("n");
        break;
    case 1:
        // It prints evaluated cost g
        for(i = 0; i < rows; i++) {
            for(j = 0; j < cols; j++) {
                if (grid[i][j].walkable){
                    if(grid[i][j].closed == 1) printf("%3d ", grid[i][j].g);
                    else printf("    ");
                } else printf("### ");
            }
            printf("n");
        }
        printf("n");
        break;
    case 2:
        // It prints estimated cost g
        for(i = 0; i < rows; i++) {
            for(j = 0; j < cols; j++) {
                if (grid[i][j].walkable){
                    if(grid[i][j].closed == 1) printf("%3d ", grid[i][j].g);
                    else printf("    ");
                } else printf("### ");
            }
            printf("n");
        }
        printf("n");
        break;
    default:
        printf("ERROR: Bad option %i for function printGrid(). Please check the code.n", option);
        break;
    }
}
int checkLocation(location l) {
    if(grid[l.x][l.y].walkable) return(0);
    else return (1);
}
void aStar(int opt_dijkstra) {
    list openList[10000];
    location current;
    location neighbors[4];
    int empty;
    int found = 0;
    int i,j;    // Counters
    int exit;   // control variable
    int f_min;
    int pos;
    int x,y;
    int ng;
    // Set g and f values of the start node to be 0
    grid[start.x][start.y].g = 0;
    grid[start.x][start.y].f = 0;
    // Initialization of the open list
    for (i = 0; i < sizeof(openList)/sizeof(openList[0]); i++) {
        openList[i].isValid = 0;
    }
    // Push the start node into the open list
    grid[start.x][start.y].opened = 1;
    openList[0].loc.x = start.x;
    openList[0].loc.y = start.y;
    openList[0].isValid = 1;
    // Save when the "while is not empty" begins
    time1 = time_while_s = omp_get_wtime();
    // While the open list is not empty
    empty = isListEmpty(openList);
    while (!empty && !found){
        // Save time to pull a node
        time_pull_s = omp_get_wtime();
        // pull the position of the node which has the minimum f value
        f_min = -1;
#pragma omp parallel for default(none) shared(openList, f_min, current, pos, grid)
        for(i = 0; i < sizeof(openList)/sizeof(openList[0]); i++) {
            if (openList[i].isValid == 1 && (f_min == -1 || (grid[openList[i].loc.x][openList[i].loc.y].f < f_min))){
#pragma omp critical(pullopenlist)
                {
                f_min = grid[openList[i].loc.x][openList[i].loc.y].f;
                current.x = openList[i].loc.x;
                current.y = openList[i].loc.y;
                pos = i;
                }
            }
        }
        openList[pos].isValid = 0;
        grid[current.x][current.y].closed = 1;
        //Save time to pull a node
        time_pull_e = omp_get_wtime();
        time_pull += time_pull_e - time_pull_s;
        // Update the count of evaluated points
        count_current++;
        // If the end position is reached, construct the path and return it
        if (current.x == end.x && current.y == end.y){
            printf("Reached the end position.n");
            constructPath(end);     // To be defined
            found = 1;
        }
        // Save when enter in getNeighbor
        time_getNeighbor_s = omp_get_wtime();
        // Get neighbors
        getNeighbor(current, neighbors);
        // Save when exit from getNeigbor
        time_getNeighbor_e = omp_get_wtime();
        // Get the distance between current node and the neighbor and calculate the next g score
        ng = grid[current.x][current.y].g + 1;
        // Save when started the "for all neighbors"
        time2 = time_for_s = omp_get_wtime();
        // Evaluate neighbors
        /* Seems that is not convenient to parallelize the loop.
         * Probably this happens because of critical section
         */
#pragma omp parallel for default(none) private(x, y, j) shared(exit, openList, neighbors, ng, grid, opt_dijkstra, end, current)
        for (i = 0; i < 4; i++) {
            x = neighbors[i].x;
            y = neighbors[i].y;
            if (x != -1 || y != -1){
                // Check if the neighbor has not been inspected yet, or if it
                // can be reached with smaller cost from the current node
                if (!grid[x][y].opened || ng < grid[x][y].g) {
                    grid[x][y].g = ng;
                    if(opt_dijkstra == 0) grid[x][y].h = 0;     // Dijkstra case with heuristic cost = 0;
                    else grid[x][y].h = heuristic(neighbors[i], end);
                    grid[x][y].f = grid[x][y].g + grid[x][y].h;
                    grid[x][y].x_prec = current.x;
                    grid[x][y].y_prec = current.y;
                }
                // If the neighbor is not in open list push it into it
#pragma omp critical (pushopenList)
                {
                    if(!grid[x][y].opened) {
                        exit = 0;
                        for(j = 0; exit == 0; j++) {
                            if(openList[j].isValid == 0) {
                                openList[j].loc.x = x;
                                openList[j].loc.y = y;
                                openList[j].isValid = 1;
                                exit = 1;
                            }
                        }
                    }
                    grid[x][y].opened = 1;
                }
            }
        }
        // Save when finish the "for all neighbors"
        time_for_e = omp_get_wtime();
        time_for += time_for_e - time_for_s;
    } // End while the open list is not empty until end point is found.
    // Save when finish the "while is not empty"
    time_while_e = omp_get_wtime();
}
int isListEmpty(list l[]){
    // It should check if the list is empty. It checks if there is at least one element that is valid.
    int i;
    int empty = 0;
    for (i = 0; i < sizeof(l)/sizeof(l[0]); i++){
        if (l[i].isValid){
            empty = 1;
            i = sizeof(l)/sizeof(l[0]);
        }
    }
    return (empty);
}
void constructPath(location n){
    /* The function reconstructs the path starting from the given point setting .inPath
     */
    int i;
    location temp;
    location temp_prec;
    temp.x = grid[n.x][n.y].x;
    temp.y = grid[n.x][n.y].y;
    grid[temp.x][temp.y].inPath = 1;
    for(i = 0; grid[temp.x][temp.y].x_prec != -1; i++) {
        temp_prec.x = grid[temp.x][temp.y].x_prec;
        temp_prec.y = grid[temp.x][temp.y].y_prec;
        temp.x = temp_prec.x;
        temp.y = temp_prec.y;
        grid[temp.x][temp.y].inPath = 1;
    }
}
void getNeighbor(location current, location neighbors[4]){
    /*
     * Get the neighbors of the given node.
     *
     *    offsets
     * +---+---+---+
     * |   | 0 |   |
     * +---+---+---+
     * | 3 |   | 1 |
     * +---+---+---+
     * |   | 2 |   |
     * +---+---+---+
     */
    int i;
    int x = current.x;
    int y = current.y;
    // Update count of getNeighbor executions
    count_getNeighbor++;
    for (i = 0; i < 4; i++){
        switch(i) {
        // ↑
        case 0:
            if(x >= 0 && y - 1 >= 0 && x < rows && y - 1 < cols && grid[x][y - 1].walkable){
                neighbors[i].x = x;
                neighbors[i].y = y - 1;
            } else{
                neighbors[i].x = -1;
                neighbors[i].y = -1;
            }
            break;
            // →
        case 1:
            if(x + 1 >= 0 && y >= 0 && x + 1 < rows && y < cols && grid[x +1][y].walkable){
                neighbors[i].x = x + 1;
                neighbors[i].y = y;
            } else{
                neighbors[i].x = -1;
                neighbors[i].y = -1;
            }
            break;
            // ↓
        case 2:
            if(x >= 0 && y + 1 >= 0 && x < rows && y + 1 < cols && grid[x][y + 1].walkable) {
                neighbors[i].x = x;
                neighbors[i].y = y + 1;
            } else{
                neighbors[i].x = -1;
                neighbors[i].y = -1;
            }           break;
            // ←
        case 3:
            if(x - 1 >= 0 && y >= 0 && x - 1 < rows && y < cols && grid[x - 1][y].walkable) {
                neighbors[i].x = x - 1;
                neighbors[i].y = y;
            } else{
                neighbors[i].x = -1;
                neighbors[i].y = -1;
            }
            break;
        }
    }
}
int heuristic(location from, location to){
    int h;
    // Manhattan distance from the two points
    h = abs(from.x - to.x) + abs(from.y - to.y);
    return(h);
}
void printStatistics(){
    // Print some useful statistics about the parallel execution of the program
    printf("nStatistics of aStar:n");
    printf("time to execute aStar: tttt%f tmsn", 1000*(time_aStar_e - time_aStar_s));
    printf("time at first check point: ttt%f tmsn", 1000*(time1 - time_aStar_s));
    printf("time at second check point: ttt%f tmsn", 1000*(time2 - time_aStar_s));
    printf("nStatistic of "while until is empty":n");
    printf("number of iterations: tttt%in", count_current);
    printf("mean time to do an iteration: ttt%f tmsn", 1000*(time_while_e - time_while_s)/count_current);
    printf("time to do all iterations: ttt%f tmsn", 1000*(time_while_e - time_while_s));
    printf("nStatistic of pull a node into openList:n");
    printf("mean time to perform a pull operation: tt%f tmsn", 1000*time_pull/count_current);
    printf("total time spent to perform pulls operations: t%f tmsn", 1000*time_pull);
    printf("nStatistic of "for each neighbor":n");
    printf("total number of iterations: ttt%in", 4*count_current);
    printf("mean time to do all four iterations: tt%f tmsn", 1000*time_for/count_current);
    printf("time to do the last four iterations: tt%f tmsn", 1000*(time_for_e - time_for_s));
    printf("time to do all the iterations: ttt%f tmsn", 1000*time_for);
    printf("nStatistic of getNeighbor:n");
    // time_getNeighbor is updated at each execution, so we have only the value relative to the last execution
    printf("time to execute getNeighbor (last execution): t%f tmsn", 1000*(time_getNeighbor_e - time_getNeighbor_s));
    printf("number of executions: tttt%in", count_getNeighbor);
    // Just an indicative time, it is NOT the time to do all executions
    printf("estimated time to do all executions: tt%f tmsn", 1000*count_getNeighbor*(time_getNeighbor_e - time_getNeighbor_s));
}
void saveStatistics(char *string_input){
    FILE *fileOutput;
    char fileOutputName[30];
    strcpy(fileOutputName, "src/stats/");
    strcat(fileOutputName, string_input);
    if((fileOutput = fopen(fileOutputName, "w")) == NULL){
        printf("ERROR: Error in opening the output file.n");
        exit(1);
    }

    // Print some useful statistics about the parallel execution of the program
    fprintf(fileOutput, "nStatistics of aStar:n");
    fprintf(fileOutput, "time to execute aStar: tttt%f tmsn", 1000*(time_aStar_e - time_aStar_s));
    fprintf(fileOutput, "time at first check point: ttt%f tmsn", 1000*(time1 - time_aStar_s));
    fprintf(fileOutput, "time at second check point: ttt%f tmsn", 1000*(time2 - time_aStar_s));
    fprintf(fileOutput, "nStatistic of "while until is empty":n");
    fprintf(fileOutput, "number of iterations: tttt%in", count_current);
    fprintf(fileOutput, "mean time to do an iteration: ttt%f tmsn", 1000*(time_while_e - time_while_s)/count_current);
    fprintf(fileOutput, "time to do all iterations: ttt%f tmsn", 1000*(time_while_e - time_while_s));
    fprintf(fileOutput, "nStatistic of pull a node into openList:n");
    fprintf(fileOutput, "mean time to perform a pull operation: tt%f tmsn", 1000*time_pull/count_current);
    fprintf(fileOutput, "total time spent to perform pulls operations: t%f tmsn", 1000*time_pull);
    fprintf(fileOutput, "nStatistic of "for each neighbor":n");
    fprintf(fileOutput, "total number of iterations: ttt%in", 4*count_current);
    fprintf(fileOutput, "mean time to do all four iterations: tt%f tmsn", 1000*time_for/count_current);
    fprintf(fileOutput, "time to do the last four iterations: tt%f tmsn", 1000*(time_for_e - time_for_s));
    fprintf(fileOutput, "time to do all the iterations: ttt%f tmsn", 1000*time_for);
    fprintf(fileOutput, "nStatistic of getNeighbor:n");
    // time_getNeighbor is updated at each execution, so we have only the value relative to the last execution
    fprintf(fileOutput, "time to execute getNeighbor (last execution): t%f tmsn", 1000*(time_getNeighbor_e - time_getNeighbor_s));
    fprintf(fileOutput, "number of executions: tttt%in", count_getNeighbor);
    // Just an indicative time, it is NOT the time to do all executions
    fprintf(fileOutput, "estimated time to do all executions: tt%f tmsn", 1000*count_getNeighbor*(time_getNeighbor_e - time_getNeighbor_s));
    fclose(fileOutput);
    printf("Saved all the stats");
}

如果您对代码的其他部分有建议,欢迎您。

您当然可以用OpenMP做这样的事情,但这并不像将#pragma omp parallel放在for循环中那么简单。对于该结构,编译器需要在进入循环时知道将进行多少次迭代,以便跨线程分解迭代;当你发现东西后离开时,你肯定没有这些信息。

你可以做这样的工作——如果你需要执行的测试非常占用CPU,这会非常有用(这里,我有一个蛮力素性测试的虚构例子),这样你就可以在几个核心之间分解工作,你只关心找到一个的结果(或者根本没有)。但请注意,您肯定不能保证并行执行此操作将返回第一个结果。

在下面的示例中,我们有一个标志found,当线程找到一个项时(使用omp atomic capture构造)设置该标志。如果它是第一个设置标志的,它会存储值和位置。一旦线程(最终)看到设置了标志,它们都会从while循环返回。

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "omp.h"
/* brute force prime-testing algorithm */
int isprime(int x) {
    int prime = 1;
    for (int i=2; i<=floor(sqrt(1.0*x)); i++) {
        if (x % i == 0) {
            prime = 0;
            break;
        }
    }
    return prime;
}
int main(int argc, char **argv) {
    const int MAXN=128;
    int start=200;
    if (argc > 1)
        start = atoi(argv[1]);
    int candidates[MAXN];
    for (int i=0; i<MAXN; i++) {
        candidates[i] = start+i;
    }
    int found=0;
    int value=-1;
    int location=-1;
#pragma omp parallel shared(found, candidates, value, location) default(none)
    {
        int tid=omp_get_thread_num();
        int nthreads=omp_get_num_threads();
        int item=tid;
        while (!found && item < MAXN) {
            int prime=isprime(candidates[item]);
            if (prime) {
                int alreadyfound=0;
#pragma omp atomic capture 
                { alreadyfound = found; found++; }
                if (!alreadyfound) {
                    location = item;
                    value = candidates[item];
                }
            }
            item += nthreads;
        }
    }
    if (!found)
        printf("No primes foundn");
    else 
        printf("Found prime: %d (%d) in location %d (%d)n", value, isprime(value), location, candidates[location]);
    return 0;
}

运行提供

$ gcc -o stopwhenfound stopwhenfound.c  -std=c99 -lm -fopenmp
$ ./stopwhenfound 370262
Found prime: 370373 (1) in location 111 (370373)

我不同意Jonathan的观点,我认为这在OpenMP中很容易做到。您只需要刷新done变量(这样它的值在所有缓存中都是一致的):

//Something before
found = 0;
**#pragma omp flush(done)**
while (!found){
  //Pull an element from an array and do some work with it
  if(something){
     found = 1;
     **#pragma omp flush(done)**
     }
  //Do other work and put an element in the same array
}
//Something later

最新更新