我有一些困难设置2 GPU,以使对等通信。我正在使用CUDA 4.0和FORTRAN编程。PGI编译器
我写了一个程序,该程序确认我的节点上有4个GPU。
我决定使用其中两个,但有以下错误: 0:DealLocate:无效设备指针。
subroutine directTransfer()
use cudafor
implicit none
integer, parameter :: N = 4*1024*1024
real, pinned, allocatable :: a(:), b(:)
real, device, allocatable :: a_d(:), b_d(:)
!these hold free and total memory before and after
!allocation, used to verify allocation happening on proper devices
integer (int_ptr_kind()),allocatable ::
& freeBefore(:), totalBefore(:),
& freeAfter(:), totalAfter(:)
integer :: istat, nDevices, i, accessPeer, timingDev
type(cudaDeviceProp)::prop
type(cudaEvent)::startEvent,stopEvent
real :: time
!allocate host arrays
allocate(a(N), b(N))
allocate(freeBefore(0:nDevices -1),
& totalBefore(0:nDevices -1))
allocate(freeAfter(0:nDevices -1),
& totalAfter(0:nDevices -1))
write(*,*) 'Start!'
!get devices ionfo (including total and free memory)
!before allocation
istat = cudaGetDeviceCount(nDevices)
if(nDevices < 2) then
write(*,*) 'Need at least two CUDA capable devices'
stop
end if
write(*,"('Number of CUDA-capable devices: ',
& i0, /)"),nDevices
do i = 0, nDevices - 1
istat = cudaGetDeviceProperties(prop, i)
istat = cudaSetDevice(i)
istat = cudaMemGetInfo(freeBefore(i), totalBefore(i))
end do
!!!Here is the trouble zone!!!!
istat = cudaSetDevice(0)
allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
deallocate(freeBefore, totalBefore,freeAfter,totalAfter)
deallocate(a,b,a_d,b_d)
end subroutine directTransfer
在以下我没有错误的情况下:
istat = cudaSetDevice(0)
allocate(a_d(N))
!istat = cudaSetDevice(1)
!allocate(b_d(N))
这样,也没有错误:
!istat = cudaSetDevice(0)
!allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
但是此返回错误
istat = cudaSetDevice(0)
allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
所以看来我无法设置2GPU来启动我的程序。您能帮我理解为什么无法设置2GPU并提示解决这个问题?
谢谢jackolantern !!
这是诀窍。我更改了代码如下,它可以完美地工作
!clean up
deallocate(freeBefore, totalBefore,freeAfter,totalAfter)
istat = cudaSetDevice(0)
deallocate(a_d)
istat = cudaSetDevice(1)
deallocate(b_d)
deallocate(a,b)
这是我的问题答案。希望它能帮助他人。