Sign Up

Sign Up to our social questions and Answers Engine to ask questions, answer people’s questions, and connect with other people.

Have an account? Sign In

Have an account? Sign In Now

Sign In

Login to our social questions & Answers Engine to ask questions answer people’s questions & connect with other people.

Sign Up Here

Forgot Password?

Don't have account, Sign Up Here

Forgot Password

Lost your password? Please enter your email address. You will receive a link and will create a new password via email.

Have an account? Sign In Now

You must login to ask a question.

Forgot Password?

Need An Account, Sign Up Here

Please briefly explain why you feel this question should be reported.

Please briefly explain why you feel this answer should be reported.

Please briefly explain why you feel this user should be reported.

Sign InSign Up

The Archive Base

The Archive Base Logo The Archive Base Logo

The Archive Base Navigation

  • SEARCH
  • Home
  • About Us
  • Blog
  • Contact Us
Search
Ask A Question

Mobile menu

Close
Ask a Question
  • Home
  • Add group
  • Groups page
  • Feed
  • User Profile
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Buy Points
  • Users
  • Help
  • Buy Theme
  • SEARCH
Home/ Questions/Q 6624245
In Process

The Archive Base Latest Questions

Editorial Team
  • 0
Editorial Team
Asked: May 25, 20262026-05-25T21:37:44+00:00 2026-05-25T21:37:44+00:00

I have a problem with OpenCL, which is that it executes the entire command

  • 0

I have a problem with OpenCL, which is that it executes the entire command queue, but it only reads only 1/4 of the input and writes only 1/4 of the result.
No matter how many iterations, always 1/4.

And also it sometimes randomly crashes..with debugging I dont get any information, since there is no debug symbols, where it crashes (0x4c4783f6 in ????, etc.)

Source code:

#include <iostream>
#include <cl/cl.h>
#include <cassert>
#include <cstring>

const char *progsrc[] = {
"#pragma OPENCL EXTENSION cl_intel_printf : enable\n\
__kernel void add(__global const int *a, __global const int *b, __global int *out) \
{ \
    int tid = get_global_id(0);\
    out[tid] = tid/*a[tid]+b[tid]*/;\
    printf(\"krnl: %d = %d + %d \\n\", out[tid], a[tid], b[tid]);\
}"};

const int iterations = 20;

#define CLCheck(a) \
do\
{\
    if(a != CL_SUCCESS)\
    {\
        std::cerr << "OpenCL Error(" << a << ") at " << __LINE__ << std::endl;\
        return -1;\
    }\
} while(0)

int main()
{
    cl_int err = CL_SUCCESS;

    int *aH = NULL;
    int *bH = NULL;
    int *outH = NULL;

    cl_uint platnum, devnum;
    cl_device_id dev;
    cl_platform_id plat;
    err = clGetPlatformIDs(0, 0, &platnum);
    CLCheck(err);
    cl_platform_id pfids[platnum];
    err = clGetPlatformIDs(platnum, pfids, &platnum);
    CLCheck(err);

    if(!platnum)
    {
        std::cerr << "No platform found." << std::endl;
        return -1;
    }
    else
        std::cout << platnum << " OpenCL platform(s) found.\n" << std::endl;

    for(unsigned int i = 0; i != platnum; i++)
    {
        char buf[4096];

        err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, 0, 0, &devnum);
        CLCheck(err);
        cl_device_id devids[devnum];
        err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, devnum, devids, &devnum);
        CLCheck(err);
        if(!devnum)
        {
            std::cerr << "No device found." << std::endl;
            return -1;
        }
        else
            std::cout << " " << devnum << " OpenCL device(s) found.\n" << std::endl;

        for(unsigned int i2 = 0; i2 != devnum; i2++)
        {
            char buf[1024];
            std::cout << ": \n\tName: " << buf;
            err = clGetDeviceInfo(devids[i2], CL_DEVICE_VENDOR, 1024, buf, NULL);
            CLCheck(err);
            if(!strncmp(buf, "Intel", 5))
            {
                dev = devids[0];
                plat = pfids[i];
                std::cout << "\n\tFound Intel(R) OpenCL device.";
            }
        }
    }
    cl_context_properties ctxprop[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)plat, 0};
    cl_context ctx = clCreateContext(ctxprop, 1, &dev, NULL, NULL, &err);
    CLCheck(err);

    cl_program program = clCreateProgramWithSource(ctx, 1, progsrc, NULL, &err);
    CLCheck(err);
    err = clBuildProgram(program, 1, &dev, "", NULL, NULL);
    if(err != CL_SUCCESS)
    {
        size_t bufsz;
        err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, 0, &bufsz);
        char buf[bufsz];
        err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, bufsz, buf, &bufsz);
        std::cerr << "OpenCL program building failed: " << buf << std::endl;
        return -1;
    }
    err = clUnloadCompiler();
    CLCheck(err);

    aH = new int[iterations];
    bH = new int[iterations];
    outH = new int[iterations];
    memset(outH, 0, iterations*sizeof(int));
    for(int i = 0; i != iterations; i++)
    {
        aH[i] = i;
        bH[i] = i*2;
    }

    cl_mem aCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
    cl_mem bCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
    CLCheck(err);
    cl_mem outCL = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, iterations, NULL, &err);
    CLCheck(err);

    cl_kernel krnl = clCreateKernel(program, "add", &err);
    CLCheck(err);

    err = clSetKernelArg(krnl, 0, sizeof(aCL), &aCL);
    CLCheck(err);
    err = clSetKernelArg(krnl, 1, sizeof(bCL), &bCL);
    CLCheck(err);
    err = clSetKernelArg(krnl, 2, sizeof(outCL), &outCL);
    CLCheck(err);

    cl_command_queue cmdqueue = clCreateCommandQueue(ctx, dev, 0, &err);
    cl_event evt;
    size_t global_work_size[1] = { iterations };
    err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations, aH, 0, NULL, NULL);
    err = clEnqueueWriteBuffer(cmdqueue, bCL, CL_TRUE, 0, iterations, bH, 0, NULL, NULL);
    err = clEnqueueNDRangeKernel(cmdqueue, krnl, 1, NULL, global_work_size, NULL, 0, NULL, &evt);
    err = clWaitForEvents(1, &evt);
    err = clEnqueueReadBuffer(cmdqueue, outCL, CL_TRUE, 0, iterations, outH, 0, NULL, &evt);

    for(int i = 0; i != iterations; i++)
    {
        std::cout << outH[i] << std::endl;
    }

    err = clReleaseEvent(evt);
    err = clReleaseCommandQueue(cmdqueue);
    err = clReleaseKernel(krnl);
    err = clReleaseMemObject(outCL);
    err = clReleaseMemObject(bCL);
    err = clReleaseMemObject(aCL);
    err = clReleaseProgram(program);
    err = clReleaseContext(ctx);

    if(aH)
        delete aH;
    if(bH)
        delete bH;
    if(outH)
        delete outH;
    return 0;
}

output:

2 OpenCL platform(s) found.

Platform 0 :
        Name: NVIDIA CUDA
        Vendor: NVIDIA Corporation
        Profile: FULL_PROFILE
        Version: OpenCL 1.1 CUDA 4.0.1
        Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing c
l_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing c
l_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll

 1 OpenCL device(s) found.

  Device 0:
        Name: GeForce GT 425M
        Vendor: NVIDIA Corporation
        Profile: FULL_PROFILE
        Driver version: 280.26
        OpenCL version: OpenCL C 1.1
        Version: OpenCL 1.1 CUDA
        Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing c
l_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing c
l_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll  cl_khr_g
lobal_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32
_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64

Platform 1 :
        Name: Intel(R) OpenCL
        Vendor: Intel(R) Corporation
        Profile: FULL_PROFILE
        Version: OpenCL 1.1
        Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_i
nt32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extende
d_atomics cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl
_intel_immediate_execution cl_khr_gl_sharing cl_khr_icd

 1 OpenCL device(s) found.

  Device 0:
        Name: Intel(R) Core(TM) i3 CPU       M 370  @ 2.40GHz
        Found Intel(R) OpenCL device.
        Vendor: Intel(R) Corporation
        Profile: FULL_PROFILE
        Driver version: 1.1
        OpenCL version: OpenCL C 1.1
        Version: OpenCL 1.1 (Build 15293.6650)
        Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_i
nt32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extende
d_atomics cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl
_intel_immediate_execution cl_khr_gl_sharing

krnl: 0 = 0 + 0
krnl: 1 = 1 + 2
krnl: 2 = 2 + 4
krnl: 3 = 3 + 6
krnl: 4 = 4 + 8
krnl: 5 = 0 + 0
krnl: 6 = 0 + 0
krnl: 7 = 0 + 0
krnl: 16 = 0 + 492859489
krnl: 17 = 0 + -1042621749
krnl: 18 = 0 + 1310105771
krnl: 19 = 0 + 134230852
krnl: 8 = 0 + 0
krnl: 9 = 0 + 0
krnl: 10 = 0 + -1094462526
krnl: 11 = 0 + -1094462526
krnl: 12 = 0 + -1230120245
krnl: 13 = 0 + 500723958
krnl: 14 = 0 + 530164160
krnl: 15 = 0 + 492859489
0
1
2
3
4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

Thanks 🙂

  • 1 1 Answer
  • 0 Views
  • 0 Followers
  • 0
Share
  • Facebook
  • Report

Leave an answer
Cancel reply

You must login to add an answer.

Forgot Password?

Need An Account, Sign Up Here

1 Answer

  • Voted
  • Oldest
  • Recent
  • Random
  1. Editorial Team
    Editorial Team
    2026-05-25T21:37:45+00:00Added an answer on May 25, 2026 at 9:37 pm

    I’m not familiar with openCL, but I think you’re missing a few sizeof‘s here:

    err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations, aH, 0, NULL, NULL);
    

    should probably be:

    err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations * sizeof(int), aH, 0, NULL, NULL);
    

    And same applies the similar code following this.

    EDIT:

    And here’s another place you may have missed a few sizeof()s:

    cl_mem aCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
    cl_mem bCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
    CLCheck(err);
    cl_mem outCL = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, iterations, NULL, &err);
    CLCheck(err);
    
    • 0
    • Reply
    • Share
      Share
      • Share on Facebook
      • Share on Twitter
      • Share on LinkedIn
      • Share on WhatsApp
      • Report

Sidebar

Related Questions

I have written a game that uses GLUT, OpenGL and FMOD. The problem is
I have problem in some JavaScript that I am writing where the Switch statement
I have a program which simulates a physical system that changes over time. I
I have been developing some computer vision tools with openCV, but every time that
I am working on an image manipulation problem. I have an overhead projector that
I have a problem that I'm trying to optimize. I'm reproducing OpenGL functions and
I have a small problem, which I think it will be easy for you
I have a problem with texturing – it loads the image correctly, but renders
I have a problem with incorrect alpha blending results with openGL ES on iPhone.
I have problem with return statment >.< I want to store all magazine names

Explore

  • Home
  • Add group
  • Groups page
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Users
  • Help
  • SEARCH

Footer

© 2021 The Archive Base. All Rights Reserved
With Love by The Archive Base

Insert/edit link

Enter the destination URL

Or link to existing content

    No search term specified. Showing recent items. Search or use up and down arrow keys to select an item.