Sign Up

Sign Up to our social questions and Answers Engine to ask questions, answer people’s questions, and connect with other people.

Have an account? Sign In

Have an account? Sign In Now

Sign In

Login to our social questions & Answers Engine to ask questions answer people’s questions & connect with other people.

Sign Up Here

Forgot Password?

Don't have account, Sign Up Here

Forgot Password

Lost your password? Please enter your email address. You will receive a link and will create a new password via email.

Have an account? Sign In Now

You must login to ask a question.

Forgot Password?

Need An Account, Sign Up Here

Please briefly explain why you feel this question should be reported.

Please briefly explain why you feel this answer should be reported.

Please briefly explain why you feel this user should be reported.

Sign InSign Up

The Archive Base

The Archive Base Logo The Archive Base Logo

The Archive Base Navigation

  • SEARCH
  • Home
  • About Us
  • Blog
  • Contact Us
Search
Ask A Question

Mobile menu

Close
Ask a Question
  • Home
  • Add group
  • Groups page
  • Feed
  • User Profile
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Buy Points
  • Users
  • Help
  • Buy Theme
  • SEARCH
Home/ Questions/Q 7769383
In Process

The Archive Base Latest Questions

Editorial Team
  • 0
Editorial Team
Asked: June 1, 20262026-06-01T16:08:46+00:00 2026-06-01T16:08:46+00:00

I’m having a problem when I try to run the reduction program from the

  • 0

I’m having a problem when I try to run the reduction program from the OpenCL in Action’s sources.

Im using Visual Studio 2008. This is the error:

Unhandled exception in 0x013526a7 in Reduction.exe: 0xC00000FD: Stack
overflow.

And in the asm file the cursor is to

test dword ptr [eax],eax ; probe page.

I tried to debug it, but when I put a breakpoint in the main function, the debugging starts, but the program does not keep running.

I don’t know what is the really problem.

These are the source files:
reduction.cpp

#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "reduction_complete.cl"

#define ARRAY_SIZE 1048576
#define KERNEL_1 "reduction_vector"
#define KERNEL_2 "reduction_complete"

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

/* Find a GPU or CPU associated with the first available platform */
cl_device_id create_device() {

   cl_platform_id platform;
   cl_device_id dev;
   int err;

   /* Identify a platform */
   err = clGetPlatformIDs(1, &platform, NULL);
   if(err < 0) {
      perror("Couldn't identify a platform");
      exit(1);
   } 

   /* Access a device */
   err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
   if(err == CL_DEVICE_NOT_FOUND) {
      err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
   }
   if(err < 0) {
      perror("Couldn't access any devices");
      exit(1);   
   }

   return dev;
}

/* Create program from a file and compile it */
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {

   cl_program program;
   FILE *program_handle;
   char *program_buffer, *program_log;
   size_t program_size, log_size;
   int err;

   /* Read program file and place content into buffer */
   program_handle = fopen(filename, "r");
   if(program_handle == NULL) {
      perror("Couldn't find the program file");
      exit(1);
   }
   fseek(program_handle, 0, SEEK_END);
   program_size = ftell(program_handle);
   rewind(program_handle);
   program_buffer = (char*)malloc(program_size + 1);
   program_buffer[program_size] = '\0';
   fread(program_buffer, sizeof(char), program_size, program_handle);
   fclose(program_handle);

   /* Create program from file */
   program = clCreateProgramWithSource(ctx, 1, 
      (const char**)&program_buffer, &program_size, &err);
   if(err < 0) {
      perror("Couldn't create the program");
      exit(1);
   }
   free(program_buffer);

   /* Build program */
   err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
   if(err < 0) {

      /* Find size of log and print to std output */
      clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 
            0, NULL, &log_size);
      program_log = (char*) malloc(log_size + 1);
      program_log[log_size] = '\0';
      clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 
            log_size + 1, program_log, NULL);
      printf("%s\n", program_log);
      free(program_log);
      exit(1);
   }

   return program;
}

int main() {

   /* OpenCL structures */
   cl_device_id device;
   cl_context context;
   cl_program program;
   cl_kernel vector_kernel, complete_kernel;
   cl_command_queue queue;
   cl_event start_event, end_event;
   cl_int i, err;
   size_t local_size, global_size;

   /* Data and buffers */
   float data[ARRAY_SIZE];
   float sum, actual_sum;
   cl_mem data_buffer, sum_buffer;
   cl_ulong time_start, time_end, total_time;

   /* Initialize data */
   for(i=0; i<ARRAY_SIZE; i++) {
      data[i] = 1.0f*i;
   }

   /* Create device and determine local size */
   device = create_device();
   err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,     
         sizeof(local_size), &local_size, NULL);    
   if(err < 0) {
      perror("Couldn't obtain device information");
      exit(1);   
   }

   /* Create a context */
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build program */
   program = build_program(context, device, PROGRAM_FILE);

   /* Create data buffer */
   data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
         CL_MEM_USE_HOST_PTR, ARRAY_SIZE * sizeof(float), data, &err);
   sum_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(float), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 
         CL_QUEUE_PROFILING_ENABLE, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Create kernels */
   vector_kernel = clCreateKernel(program, KERNEL_1, &err);
   complete_kernel = clCreateKernel(program, KERNEL_2, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);
   };

   /* Set arguments for vector kernel */
   err = clSetKernelArg(vector_kernel, 0, sizeof(cl_mem), &data_buffer);
   err |= clSetKernelArg(vector_kernel, 1, local_size * 4 * sizeof(float), NULL);

   /* Set arguments for complete kernel */
   err = clSetKernelArg(complete_kernel, 0, sizeof(cl_mem), &data_buffer);
   err |= clSetKernelArg(complete_kernel, 1, local_size * 4 * sizeof(float), NULL);
   err |= clSetKernelArg(complete_kernel, 2, sizeof(cl_mem), &sum_buffer);
   if(err < 0) {
      perror("Couldn't create a kernel argument");
      exit(1);   
   }

   /* Enqueue kernels */
   global_size = ARRAY_SIZE/4;
   err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size, 
         &local_size, 0, NULL, &start_event);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }
   printf("Global size = %zu\n", global_size);

   /* Perform successive stages of the reduction */
   while(global_size/local_size > local_size) {
      global_size = global_size/local_size;
      err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size, 
            &local_size, 0, NULL, NULL);
      printf("Global size = %zu\n", global_size);
      if(err < 0) {
         perror("Couldn't enqueue the kernel");
         exit(1);   
      }
   }
   global_size = global_size/local_size;
   err = clEnqueueNDRangeKernel(queue, complete_kernel, 1, NULL, &global_size, 
         NULL, 0, NULL, &end_event);
   printf("Global size = %zu\n", global_size);

   /* Finish processing the queue and get profiling information */
   clFinish(queue);
   clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
         sizeof(time_start), &time_start, NULL);
   clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
         sizeof(time_end), &time_end, NULL);
   total_time = time_end - time_start;

   /* Read the result */
   err = clEnqueueReadBuffer(queue, sum_buffer, CL_TRUE, 0, 
      sizeof(float), &sum, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }

   /* Check result */
   actual_sum = 1.0f * (ARRAY_SIZE/2)*(ARRAY_SIZE-1);
   if(fabs(sum - actual_sum) > 0.01*fabs(sum))
      printf("Check failed.\n");
   else
      printf("Check passed.\n");
   printf("Total time = %lu\n", total_time);

   /* Deallocate resources */
   clReleaseEvent(start_event);
   clReleaseEvent(end_event);
   clReleaseMemObject(sum_buffer);
   clReleaseMemObject(data_buffer);
   clReleaseKernel(vector_kernel);
   clReleaseKernel(complete_kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}

reduction_complete.cl

__kernel void reduction_vector(__global float4* data, 
      __local float4* partial_sums) {

   int lid = get_local_id(0);
   int group_size = get_local_size(0);

   partial_sums[lid] = data[get_global_id(0)];
   barrier(CLK_LOCAL_MEM_FENCE);

   for(int i = group_size/2; i>0; i >>= 1) {
      if(lid < i) {
         partial_sums[lid] += partial_sums[lid + i];
      }
      barrier(CLK_LOCAL_MEM_FENCE);
   }

   if(lid == 0) {
      data[get_group_id(0)] = partial_sums[0];
   }
}

__kernel void reduction_complete(__global float4* data, 
      __local float4* partial_sums, __global float* sum) {

   int lid = get_local_id(0);
   int group_size = get_local_size(0);

   partial_sums[lid] = data[get_local_id(0)];
   barrier(CLK_LOCAL_MEM_FENCE);

   for(int i = group_size/2; i>0; i >>= 1) {
      if(lid < i) {
         partial_sums[lid] += partial_sums[lid + i];
      }
      barrier(CLK_LOCAL_MEM_FENCE);
   }

   if(lid == 0) {
      *sum = partial_sums[0].s0 + partial_sums[0].s1 +
             partial_sums[0].s2 + partial_sums[0].s3;
   }
}

I dont know what causes the stackoverflow…

  • 1 1 Answer
  • 0 Views
  • 0 Followers
  • 0
Share
  • Facebook
  • Report

Leave an answer
Cancel reply

You must login to add an answer.

Forgot Password?

Need An Account, Sign Up Here

1 Answer

  • Voted
  • Oldest
  • Recent
  • Random
  1. Editorial Team
    Editorial Team
    2026-06-01T16:08:47+00:00Added an answer on June 1, 2026 at 4:08 pm

    I don’t see any recursion so my guess is the float data[ARRAY_SIZE]; where #define ARRAY_SIZE 1048576 is putting 4MB on the stack which is pretty large. Try changing that to a dynamic allocation.

    • 0
    • Reply
    • Share
      Share
      • Share on Facebook
      • Share on Twitter
      • Share on LinkedIn
      • Share on WhatsApp
      • Report

Sidebar

Related Questions

link Im having trouble converting the html entites into html characters, (&# 8217;) i
We're building an app, our first using Rails 3, and we're having to build
That's pretty much it. I'm using Nokogiri to scrape a web page what has
For some reason, after submitting a string like this Jack’s Spindle from a text
I have a string like this: La Torre Eiffel paragonata all&#8217;Everest What PHP function
I am reading a book about Javascript and jQuery and using one of the
I'm using v2.0 of ClassTextile.php, with the following call: $testimonial_text = $textile->TextileRestricted($_POST['testimonial']); ... and
I would like to run a str_replace or preg_replace which looks for certain words
I'm parsing an RSS feed that has an &#8217; in it. SimpleXML turns this
We are using XSLT to translate a RIXML file to XML. Our RIXML contains

Explore

  • Home
  • Add group
  • Groups page
  • Communities
  • Questions
    • New Questions
    • Trending Questions
    • Must read Questions
    • Hot Questions
  • Polls
  • Tags
  • Badges
  • Users
  • Help
  • SEARCH

Footer

© 2021 The Archive Base. All Rights Reserved
With Love by The Archive Base

Insert/edit link

Enter the destination URL

Or link to existing content

    No search term specified. Showing recent items. Search or use up and down arrow keys to select an item.