编译openMP和数学库的问题



我正在尝试用openMP编译一个程序:

gcc -c fopenmp -lm prog.c -o prog

prog.c包含。但是,当我运行。/prog时,错误是:

bash: ./prog: Permission denied

我再试一次,没有-c标志:

gcc -o prog -fopenmp -lm prog.c

但这次它似乎没有看到数学库中的函数,如cos, sqrt(未定义引用)。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <omp.h>
#define REAL float
#define NX (64)
#ifndef M_PI
#define M_PI (3.1415926535897932384626)
#endif

void init(REAL *buff, const int nx, const int ny, const int nz,
          const REAL kx, const REAL ky, const REAL kz,
          const REAL dx, const REAL dy, const REAL dz,
          const REAL kappa, const REAL time) {
  REAL ax, ay, az;
  int jz, jy, jx;
  ax = exp(-kappa*time*(kx*kx));
  ay = exp(-kappa*time*(ky*ky));
  az = exp(-kappa*time*(kz*kz));
  for (jz = 0; jz < nz; jz++) {
    for (jy = 0; jy < ny; jy++) {
      for (jx = 0; jx < nx; jx++) {
        int j = jz*nx*ny + jy*nx + jx;
        REAL x = dx*((REAL)(jx + 0.5));
        REAL y = dy*((REAL)(jy + 0.5));
        REAL z = dz*((REAL)(jz + 0.5));
        REAL f0 = (REAL)0.125
          *(1.0 - ax*cos(kx*x))
          *(1.0 - ay*cos(ky*y))
          *(1.0 - az*cos(kz*z));
        buff[j] = f0;
      }
    }
  }
}
REAL accuracy(const REAL *b1, REAL *b2, const int len) {
  REAL err = 0.0;
  int i;
  for (i = 0; i < len; i++) {
    err += (b1[i] - b2[i]) * (b1[i] - b2[i]);
  }
  return (REAL)sqrt(err/len);
}
typedef void (*diffusion_loop_t)(REAL *f1, REAL *f2, int nx, int ny, int nz,
                                 REAL ce, REAL cw, REAL cn, REAL cs, REAL ct,
                                 REAL cb, REAL cc, REAL dt,
                                 REAL **f_ret, REAL *time_ret, int *count_ret);
static void
diffusion_baseline(REAL *f1, REAL *f2, int nx, int ny, int nz,
                   REAL ce, REAL cw, REAL cn, REAL cs, REAL ct,
                   REAL cb, REAL cc, REAL dt,
                   REAL **f_ret, REAL *time_ret, int *count_ret) {
  REAL time = 0.0;
  int count = 0;
  do {
    int z;
    for (z = 0; z < nz; z++) {
      int y;
      for (y = 0; y < ny; y++) {
        int x;
        for (x = 0; x < nx; x++) {
          int c, w, e, n, s, b, t;
          c =  x + y * nx + z * nx * ny;
          w = (x == 0)    ? c : c - 1;
          e = (x == nx-1) ? c : c + 1;
          n = (y == 0)    ? c : c - nx;
          s = (y == ny-1) ? c : c + nx;
          b = (z == 0)    ? c : c - nx * ny;
          t = (z == nz-1) ? c : c + nx * ny;
          f2[c] = cc * f1[c] + cw * f1[w] + ce * f1[e]
              + cs * f1[s] + cn * f1[n] + cb * f1[b] + ct * f1[t];
        }
      }
    }
    REAL *t = f1;
    f1 = f2;
    f2 = t;
    time += dt;
    count++;
  } while (time + 0.5*dt < 0.1);
  *time_ret = time;
  *f_ret = f1;
  *count_ret = count;
  return;
}
static void
diffusion_openmp(REAL *f1, REAL *f2, int nx, int ny, int nz,
                   REAL ce, REAL cw, REAL cn, REAL cs, REAL ct,
                   REAL cb, REAL cc, REAL dt,
                   REAL **f_ret, REAL *time_ret, int *count_ret) {

#pragma omp parallel
  {
    REAL time = 0.0;
    int count = 0;
    REAL *f1_t = f1;
    REAL *f2_t = f2;
#pragma omp master
    printf("%d threads runningn", omp_get_num_threads());
    do {
      int z;
#pragma omp for
      for (z = 0; z < nz; z++) {
        int y;
        for (y = 0; y < ny; y++) {
          int x;
          for (x = 0; x < nx; x++) {
            int c, w, e, n, s, b, t;
            c =  x + y * nx + z * nx * ny;
            w = (x == 0)    ? c : c - 1;
            e = (x == nx-1) ? c : c + 1;
            n = (y == 0)    ? c : c - nx;
            s = (y == ny-1) ? c : c + nx;
            b = (z == 0)    ? c : c - nx * ny;
            t = (z == nz-1) ? c : c + nx * ny;
            f2_t[c] = cc * f1_t[c] + cw * f1_t[w] + ce * f1_t[e]
                + cs * f1_t[s] + cn * f1_t[n] + cb * f1_t[b] + ct * f1_t[t];
          }
        }
      }
      REAL *t = f1_t;
      f1_t = f2_t;
      f2_t = t;
      time += dt;
      count++;
    } while (time + 0.5*dt < 0.1);
#pragma omp master
    {
      *f_ret = f1_t;
      *time_ret = time;      
      *count_ret = count;        
    }
  }
  return;
}

int main(int argc, char *argv[]) 
{
  struct timeval time_begin, time_end;
  int    nx    = NX;
  int    ny    = NX;
  int    nz    = NX;
  REAL *f1 = (REAL *)malloc(sizeof(REAL)*NX*NX*NX);
  REAL *f2 = (REAL *)malloc(sizeof(REAL)*NX*NX*NX);  
  REAL   time  = 0.0;
  int    count = 0;  
  REAL l, dx, dy, dz, kx, ky, kz, kappa, dt;
  REAL ce, cw, cn, cs, ct, cb, cc;
  l = 1.0;
  kappa = 0.1;
  dx = dy = dz = l / nx;
  kx = ky = kz = 2.0 * M_PI;
  dt = 0.1*dx*dx / kappa;
  init(f1, nx, ny, nz, kx, ky, kz, dx, dy, dz, kappa, time);
  ce = cw = kappa*dt/(dx*dx);
  cn = cs = kappa*dt/(dy*dy);
  ct = cb = kappa*dt/(dz*dz);
  cc = 1.0 - (ce + cw + cn + cs + ct + cb);
  diffusion_loop_t diffusion_loop = diffusion_baseline;
  if (argc == 2) {
    if (strcmp(argv[1], "openmp") == 0) {
      diffusion_loop = diffusion_openmp;
    }
  }
  gettimeofday(&time_begin, NULL);
  diffusion_loop(f1, f2, nx, ny, nz, ce, cw, cn, cs, ct, cb, cc, dt,
                 &f1, &time, &count);
  gettimeofday(&time_end, NULL);
  REAL *answer = (REAL *)malloc(sizeof(REAL) * nx*ny*nz);
  init(answer, nx, ny, nz, kx, ky, kz, dx, dy, dz, kappa, time);
  REAL err = accuracy(f1, answer, nx*ny*nz);
  double elapsed_time = (time_end.tv_sec - time_begin.tv_sec)
      + (time_end.tv_usec - time_begin.tv_usec)*1.0e-6;
  REAL mflops = (nx*ny*nz)*13.0*count/elapsed_time * 1.0e-06;
  double thput = (nx * ny * nz) * sizeof(REAL) * 2.0 * count
      / elapsed_time / (1 << 30);
  fprintf(stderr, "elapsed time : %.3f (s)n", elapsed_time);
  fprintf(stderr, "flops        : %.3f (MFlops)n", mflops);
  fprintf(stderr, "throughput   : %.3f (GB/s)n", thput);  
  fprintf(stderr, "accuracy     : %en", err);
  free(answer);
  free(f1);
  free(f2);
  return 0;
}

在第一种情况下,-c标志仅编译为对象文件,而不是实际的可执行文件。由于目标文件通常没有可执行位设置(因为它们不是直接可执行的),因此您会得到Permission denied错误。

在第二种情况下,这是因为您指定-l库的顺序很重要。您需要将-lm移动到prog.c文件之后,如下所示:

gcc -o prog -fopenmp prog.c -lm

最新更新