我也是jCUDA和java的新手。我正在尝试在Redhat Linux上使用eclipse从NVIDIA示例中编译一个矢量加法程序。
步骤如下:1. 输入:nvcc -ptx JCudaVectorAddKernel。cu ->生成JCudaVectorAddKernel.ptx文件2. 执行以下程序:JCudaVectorAdd.java:
包JCudaVectorAdd;
import static jcuda.driver.JCudaDriver.*;
import java.io.*;
import jcuda.*;
import jcuda.driver.*;
public class JCudaVectorAdd
{
public static void main(String[] args) throws IOException
{
// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
// Create the PTX file by calling the NVCC
String ptxFileName = preparePtxFile("JCudaVectorAddKernel.cu");
// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
// Load the ptx file.
CUmodule module = new CUmodule();
cuModuleLoad(module, ptxFileName);
// Obtain a function pointer to the "add" function.
CUfunction function = new CUfunction();
cuModuleGetFunction(function, module, "add");
int numElements = 100000;
// Allocate and fill the host input data
float hostInputA[] = new float[numElements];
float hostInputB[] = new float[numElements];
for(int i = 0; i < numElements; i++)
{
hostInputA[i] = (float)i;
hostInputB[i] = (float)i;
}
// Allocate the device input data, and copy the
// host input data to the device
CUdeviceptr deviceInputA = new CUdeviceptr();
cuMemAlloc(deviceInputA, numElements * Sizeof.FLOAT);
cuMemcpyHtoD(deviceInputA, Pointer.to(hostInputA),
numElements * Sizeof.FLOAT);
CUdeviceptr deviceInputB = new CUdeviceptr();
cuMemAlloc(deviceInputB, numElements * Sizeof.FLOAT);
cuMemcpyHtoD(deviceInputB, Pointer.to(hostInputB),
numElements * Sizeof.FLOAT);
// Allocate device output memory
CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, numElements * Sizeof.FLOAT);
// Set up the kernel parameters: A pointer to an array
// of pointers which point to the actual values.
Pointer kernelParameters = Pointer.to(
Pointer.to(new int[]{numElements}),
Pointer.to(deviceInputA),
Pointer.to(deviceInputB),
Pointer.to(deviceOutput)
);
// Call the kernel function.
int blockSizeX = 256;
int gridSizeX = (int)Math.ceil((double)numElements / blockSizeX);
cuLaunchKernel(function,
gridSizeX, 1, 1, // Grid dimension
blockSizeX, 1, 1, // Block dimension
0, null, // Shared memory size and stream
kernelParameters, null // Kernel- and extra parameters
);
cuCtxSynchronize();
// Allocate host output memory and copy the device output
// to the host.
float hostOutput[] = new float[numElements];
cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput,
numElements * Sizeof.FLOAT);
// Verify the result
boolean passed = true;
for(int i = 0; i < numElements; i++)
{
float expected = i+i;
if (Math.abs(hostOutput[i] - expected) > 1e-5)
{
System.out.println(
"At index "+i+ " found "+hostOutput[i]+
" but expected "+expected);
passed = false;
break;
}
}
System.out.println("Test "+(passed?"PASSED":"FAILED"));
// Clean up.
cuMemFree(deviceInputA);
cuMemFree(deviceInputB);
cuMemFree(deviceOutput);
}
private static String preparePtxFile(String cuFileName) throws IOException
{
int endIndex = cuFileName.lastIndexOf('.');
if (endIndex == -1)
{
endIndex = cuFileName.length()-1;
}
String ptxFileName = cuFileName.substring(0, endIndex+1)+"ptx";
File ptxFile = new File(ptxFileName);
if (ptxFile.exists())
{
return ptxFileName;
}
File cuFile = new File(cuFileName);
if (!cuFile.exists())
{
throw new IOException("Input file not found: "+cuFileName);
}
String modelString = "-m"+System.getProperty("sun.arch.data.model");
String command =
"nvcc " + modelString + " -ptx "+
cuFile.getPath()+" -o "+ptxFileName;
System.out.println("Executingn"+command);
Process process = Runtime.getRuntime().exec(command);
String errorMessage = new String(toByteArray(process.getErrorStream()));
String outputMessage= new String(toByteArray(process.getInputStream()));
int exitValue = 0;
try
{
exitValue = process.waitFor();
}
catch (InterruptedException e)
{
Thread.currentThread().interrupt();
throw new IOException(
"Interrupted while waiting for nvcc output", e);
}
if (exitValue != 0)
{
System.out.println("nvcc process exitValue "+exitValue);
System.out.println("errorMessage:n"+errorMessage);
System.out.println("outputMessage:n"+outputMessage);
throw new IOException(
"Could not create .ptx file: "+errorMessage);
}
System.out.println("Finished creating PTX file");
return ptxFileName;
}
private static byte[] toByteArray(InputStream inputStream) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte buffer[] = new byte[8192];
while (true)
{
int read = inputStream.read(buffer);
if (read == -1)
{
break;
}
baos.write(buffer, 0, read);
}
return baos.toByteArray();
}
}
JCudaVectorAddKernel.cu:
extern "C"
__global__ void add(int n, float *a, float *b, float *sum)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i<n)
{
sum[i] = a[i] + b[i];
}
}
"JCudaVectorAddKernel 。cu'和'JCudaVectorAddKernel.java'在同一路径上:/home/sandeep/workspace1/jCuda/jCudaVectorAdd/src/jCudaVectorAdd
当我在eclipse中执行程序时,它给了我以下错误:
Exception in thread "main" java.io.IOException: Input file not found: JCudaVectorAddKernel.cu
at JCudaVectorAdd.JCudaVectorAdd.preparePtxFile(JCudaVectorAdd.java:128)
at JCudaVectorAdd.JCudaVectorAdd.main(JCudaVectorAdd.java:20)
有什么要做的有关编译命令?或ptx/。文件路径?如果我走错方向,请给我指路。
示例试图在运行时编译PTX文件,并打印它试图执行的命令。当您手动编译PTX文件时,这实际上可能是不必要的,并且您可以更改
String ptxFileName = preparePtxFile("JCudaVectorAddKernel.cu");
String ptxFileName = "JCudaVectorAddKernel.ptx";
在任何情况下,您都可以打印文件名
File ptxFile = new File(ptxFileName);
System.out.println(ptxFile.getCanonicalPath());
和
File cuFile = new File(cuFileName);
System.out.println(cuFile.getCanonicalPath());
查看它们是否与期望的目录匹配。但是PTX文件(和CU文件)应该位于
中。/home/sandeep/workspace1/jCuda/jCudaVectorAdd/
(即在项目的"根"目录下)