代码之家  ›  专栏  ›  技术社区  ›  achinda99

简单的opencl程序编译并运行,但输出不正确

  •  1
  • achinda99  · 技术社区  · 14 年前

    我写了一个简单的基于SDK的OpenCL程序,它编译并运行,但是输出是错误的。我做错什么了吗?

    对于学习调试C和Opencl的任何建议都非常感谢。我对月台很陌生。

    代码如下。

    数组C中的输出都是零。

    谢谢。

    试验品

    #ifndef _TEST_OPENCL_H_
    #define _TEST_OPENCL_H_
    
    int main( int argc, const char** argv);
    int runTest( int argc, const char** argv);
    
    #endif
    

    测试线程

    // simple test of adding a[i] to b[i] to get c[i]
    __kernel void add_array(__global float *a, __global float *b, __global float *c)
    {
        int xid = get_global_id(0);
        c[xid] = a[xid] + b[xid];
    }
    

    测试开放

    // standard utility and system includes
    #include <oclUtils.h>
    #include "test_opencl.h"
    
    // OpenCL error catcher
    cl_int err = 0;
    
    // Main Program
    // *********************************************************************
    int main( int argc, const char** argv) 
    {    
        // set logfile name and start logs
        shrSetLogFileName ("test_opencl.txt");
        shrLog(LOGBOTH, 0, "%s Starting...\n\n", argv[0]); 
    
        // run the main test
        int result = runTest(argc, argv);
        shrCheckError(result, 0);
    
        // finish
        shrEXIT(argc, argv);
    }
    
    //! Run a simple test for OPENCL
    // *********************************************************************
    int runTest( int argc, const char** argv) 
    {
        cl_context gpu_context;
        cl_command_queue cmd_queue;
        cl_program program;
        cl_kernel test_kernel;
    
        const size_t szGlobalWorkSize = 10;
        const size_t szLocalWorkSize = 10;
    
        // size of memory required to store the array
        const unsigned int mem_size = sizeof(int) * 10;
    
        // create the OpenCL context on a GPU device
        gpu_context = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
        shrCheckError(err, CL_SUCCESS);
    
        // get devices
        cl_device_id device;
        if( shrCheckCmdLineFlag(argc, argv, "device") ) {
          int device_nr = 0;
          shrGetCmdLineArgumenti(argc, argv, "device", &device_nr);
          device = oclGetDev(gpu_context, device_nr);
        } else {
          device = oclGetMaxFlopsDev(gpu_context);
        }
    
        // create a command-queue
        cmd_queue = clCreateCommandQueue(gpu_context, device, 0, &err);
        shrCheckError(err, CL_SUCCESS);
    
        // allocate and initalize host memory
        int a[10], b[10], c[10];
        for (int i = 0; i < 10; i++) {
            a[i] = i;
            b[i] = i * i;
        }
    
        // create buffers on device
        cl_mem vol_a = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, a, &err);
        shrCheckError(err, CL_SUCCESS);
    
        cl_mem vol_b = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, b, &err);
        shrCheckError(err, CL_SUCCESS);
    
        cl_mem vol_c = clCreateBuffer(gpu_context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, c, &err);
        shrCheckError(err, CL_SUCCESS);
    
        // copy data from host to device
        err = clEnqueueWriteBuffer(cmd_queue, vol_a, CL_TRUE, 0, mem_size, a, 0, NULL, NULL);
        err |= clEnqueueWriteBuffer(cmd_queue, vol_b, CL_TRUE, 0, mem_size, b, 0, NULL, NULL);
        shrCheckError(err, CL_SUCCESS);
    
        // Program Setup
        size_t program_length;
        char* source_path = shrFindFilePath("test_opencl.cl", argv[0]);
        shrCheckError(source_path != NULL, shrTRUE);
        char *source = oclLoadProgSource(source_path, "", &program_length);
        shrCheckError(source != NULL, shrTRUE);
    
        // create the program
        program = clCreateProgramWithSource(gpu_context, 1, (const char **)&source, &program_length, &err);
        shrCheckError(err, CL_SUCCESS);
    
        // build the program
        err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
        if (err != CL_SUCCESS)
        {
            // write out standard error, Build Log and PTX, then return error
            shrLog(LOGBOTH | ERRORMSG, err, STDERROR);
            return(EXIT_FAILURE); 
        }
    
        clFinish(cmd_queue);
    
        shrLog(LOGBOTH, 0, "%s Starting kernel operation...\n\n", argv[0]); 
    
        // create the test kernel
        test_kernel = clCreateKernel(program, "add_array", &err);
        shrCheckError(err, CL_SUCCESS);
    
        // set the args values for the kernel
        err  = clSetKernelArg(test_kernel, 0, sizeof(cl_mem), (void *) &vol_a);
        err |= clSetKernelArg(test_kernel, 1, sizeof(cl_mem), (void *) &vol_b);
        err |= clSetKernelArg(test_kernel, 2, sizeof(cl_mem), (void *) &vol_c);
        shrCheckError(err, CL_SUCCESS);
    
        err = clEnqueueNDRangeKernel(cmd_queue, test_kernel, 1, NULL, &szGlobalWorkSize, NULL, 0, NULL, NULL);
        shrCheckError(err, CL_SUCCESS);
    
        clFinish(cmd_queue);
    
        // copy result from device to host
        err = clEnqueueReadBuffer(cmd_queue, vol_c, CL_TRUE, 0, mem_size, c, 0, NULL, NULL);
        shrCheckError(err, CL_SUCCESS);
    
        int d[10];
        err = clEnqueueReadBuffer(cmd_queue, vol_a, CL_TRUE, 0, mem_size, d, 0, NULL, NULL);
        shrCheckError(err, CL_SUCCESS);
    
        clFinish(cmd_queue);
    
        shrLog(LOGBOTH, 0, "%s Finished kernel operation...\n\n", argv[0]); 
    
        bool passed = true;
    
        for (int i = 0; i < 10; i++) {
            if (c[i] != i + i * i)
                passed = false;
                shrLog(LOGBOTH, 0, "c = %d    d = %d\n", c[i], d[i]); 
        }
    
        if (passed)
            shrLog(LOGBOTH, 0, "%s Test Passed\n\n", argv[0]); 
        else
            shrLog(LOGBOTH, 0, "%s Test Failed\n\n", argv[0]); 
    
        // cleanup OpenCL
        clReleaseMemObject(vol_a);
        clReleaseMemObject(vol_b);
        clReleaseMemObject(vol_c);
    
        clReleaseKernel(test_kernel);
        clReleaseProgram(program);
        clReleaseCommandQueue(cmd_queue);
        clReleaseContext(gpu_context);
    
        return 0;
    }
    
    1 回复  |  直到 11 年前
        1
  •  1
  •   achinda99    14 年前

    可以找到代码中的问题和解决方案 here .