我需要并行计算一个大矩阵的特征值。为此,我使用cuSolver。为了测试它是如何工作的,我从文档中获取了代码:
#include <stdlib.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <cusolverDn.h>
void printMatrix(int m, int n, const double*A, int lda, const char* name)
{
for(int row = 0 ; row < m ; row++){
for(int col = 0 ; col < n ; col++){
double Areg = A[row + col*lda];
printf("%s(%d,%d) = %fn", name, row+1, col+1, Areg);
}
}
}
int main(int argc, char*argv[])
{
cusolverDnHandle_t cusolverH = NULL;
cusolverStatus_t cusolver_status = CUSOLVER_STATUS_SUCCESS;
cudaError_t cudaStat1 = cudaSuccess;
cudaError_t cudaStat2 = cudaSuccess;
cudaError_t cudaStat3 = cudaSuccess;
const int m = 3;
const int lda = m;
/* | 3.5 0.5 0 |
* A = | 0.5 3.5 0 |
* | 0 0 2 |
*
*/
double A[lda*m] = { 3.5, 0.5, 0, 0.5, 3.5, 0, 0, 0, 2.0};
double lambda[m] = { 2.0, 3.0, 4.0};
double V[lda*m]; // eigenvectors
double W[m]; // eigenvalues
double *d_A = NULL;
double *d_W = NULL;
int *devInfo = NULL;
double *d_work = NULL;
int lwork = 0;
int info_gpu = 0;
printf("A = (matlab base-1)n");
printMatrix(m, m, A, lda, "A");
printf("=====n");
// step 1: create cusolver/cublas handle
cusolver_status = cusolverDnCreate(&cusolverH);
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
// step 2: copy A and B to device
cudaStat1 = cudaMalloc ((void**)&d_A, sizeof(double) * lda * m);
cudaStat2 = cudaMalloc ((void**)&d_W, sizeof(double) * m);
cudaStat3 = cudaMalloc ((void**)&devInfo, sizeof(int));
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
cudaStat1 = cudaMemcpy(d_A, A, sizeof(double) * lda * m, cudaMemcpyHostToDevice);
assert(cudaSuccess == cudaStat1);
// step 3: query working space of syevd
cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvalues and eigenvectors.
cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
cusolver_status = cusolverDnDsyevd_bufferSize(
cusolverH,
jobz,
uplo,
m,
d_A,
lda,
d_W,
&lwork);
assert (cusolver_status == CUSOLVER_STATUS_SUCCESS);
cudaStat1 = cudaMalloc((void**)&d_work, sizeof(double)*lwork);
assert(cudaSuccess == cudaStat1);
// step 4: compute spectrum
cusolver_status = cusolverDnDsyevd(
cusolverH,
jobz,
uplo,
m,
d_A,
lda,
d_W,
d_work,
lwork,
devInfo);
cudaStat1 = cudaDeviceSynchronize();
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
assert(cudaSuccess == cudaStat1);
cudaStat1 = cudaMemcpy(W, d_W, sizeof(double)*m, cudaMemcpyDeviceToHost);
cudaStat2 = cudaMemcpy(V, d_A, sizeof(double)*lda*m, cudaMemcpyDeviceToHost);
cudaStat3 = cudaMemcpy(&info_gpu, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
assert(cudaSuccess == cudaStat1);
assert(cudaSuccess == cudaStat2);
assert(cudaSuccess == cudaStat3);
printf("after syevd: info_gpu = %dn", info_gpu);
assert(0 == info_gpu);
printf("eigenvalue = (matlab base-1), ascending ordern");
for(int i = 0 ; i < m ; i++){
printf("W[%d] = %En", i+1, W[i]);
}
printf("V = (matlab base-1)n");
printMatrix(m, m, V, lda, "V");
printf("=====n");
// step 4: check eigenvalues
double lambda_sup = 0;
for(int i = 0 ; i < m ; i++){
double error = fabs( lambda[i] - W[i]);
lambda_sup = (lambda_sup > error)? lambda_sup : error;
}
printf("|lambda - W| = %En", lambda_sup);
// free resources
if (d_A ) cudaFree(d_A);
if (d_W ) cudaFree(d_W);
if (devInfo) cudaFree(devInfo);
if (d_work ) cudaFree(d_work);
if (cusolverH) cusolverDnDestroy(cusolverH);
cudaDeviceReset();
return 0;
}
但编译后,我得到了下一个错误undefined reference to `cusolverDnCreate'
、undefined reference to cusolverDnDsyevd_bufferSize
、undefined reference to cusolverDnDsyevd
和undefined reference to `cusolverDnDestroy
,但所有这些函数都存在于<cusolverDen.h>。
最奇怪的是,当我试图按照上面所说的编译这个文件时:对cusolverDn的未定义引用。相反,我在上面列出了两个第一个错误(其他错误相同(,我得到了新的错误:identifier "cusolverEigMode_t" is undefined
和identifier "CUSOLVER_EIG_MODE_VECTOR" is undefined
。
我试着解决了几天,如果你能帮我,我将非常感激!
您必须在Makefile 中链接-lculsolver