I’m working on a project where i need to load and process gigabytes of sequences from a file. Since i’m dealing with a lot of data, i can’t save it on RAM. So i’m using one thread to load data from this file and save in a queue and one thread to, once detect that there is something on queue, unload it to some temporary file.
I’m having some trouble doing this. Looks like there is a racing condition. Sometimes it works, sometimes it return a segmentation fault.
I wrote a minimal code example with the bug.
This is my queue code:
//####################
// STRUCTS
//####################
struct queue_item{
char *seq;
struct queue_item *prox;//Next element
};
typedef struct queue_item QueueItem;
struct Queue{
QueueItem *first;//First element on queue
QueueItem *end;//Last element on queue
int size;//Queue size
};
typedef struct Queue Queue;
//####################
Queue* create_queue(){
Queue *f;
f = (Queue*)malloc(sizeof(Queue));
f->size = 0;
return f;
}
QueueItem* new_queue_item(char *seq){
QueueItem* new;
int n;
n = strlen(seq);
new = (QueueItem*)malloc(sizeof(QueueItem));
new->seq = (char*)malloc((n+1)*sizeof(char));
strcpy(new->seq,seq);
return new;
}
void enqueue(Queue *f,char *seq){
QueueItem* new;
new = new_queue_item(seq);
switch(f->size){
case 0:
f->first = new;
break;
case 1:
f->end = new;
f->first->prox = f->end;
break;
default:
f->end->prox = new;
f->end = new;
break;
}
f->size = f->size + 1;
return;
}
char* dequeue(Queue *f){
QueueItem *hold;
char *seq;
if(f->size > 0){
hold = f->first;
seq = f->first->seq;
f->first = f->first->prox;
free(hold);
f->size--;
}
return seq;
}
int queue_size(Queue *f){
return f->size;
}
void seq_to_file(char *seq,FILE *f){
if(seq != NULL){
fputs(seq,f);
free(seq);
}
return;
}
This is my main code:
Queue *f;
int i;
int end_flag;
char *seq;
f = create_queue();
end_flag = 0;
#pragma omp parallel shared(f) shared(end_flag)
{
#pragma omp sections
{
#pragma omp section
{
FILE *tmp;
tmp = fopen("tmp","w");
while(!end_flag){
if(queue_size(f) > 0)
seq_to_file(dequeue(f),tmp);
}
fclose(tmp);
}
#pragma omp section
{
seq = (char*)malloc(21*sizeof(char));
strcpy(seq,"ABCDEFGHIJKLMNOPQRST");
for(i=0;i < NSEQS;i++)
enqueue(f,seq);
end_flag = 1;
}
}
}
Some errors that i detected:
1 – malloc error on new_queue_item() line:
new->seq = (char*)malloc((n+1)*sizeof(char));
* glibc detected * /home/pedro/Dropbox/Programação/C/Queue/fila_teste: double free or corruption (out): 0x00000000006f3bd0 *
glibc detected /home/pedro/Dropbox/Programação/C/Queue/fila_teste: malloc(): memory corruption (fast): 0x00000000006f3b70 *
2 – malloc error on new_queu_item() line:
new = (QueueItem*)malloc(sizeof(QueueItem));
3 – free error on seq_to_file() line:
free(seq);
* glibc detected * /home/pedro/Dropbox/Programação/C/Queue/fila_teste: double free or corruption (out): 0x0000000000cdd3f0 *
Checking with gdb i have:
(gdb) print *f
$16 = {first = 0x0, end = 0x611180, size = 426}
This third error make me think that this really is a race condition situation.
I tried to simulate a semaphore with “end_flag”, but don’t think it is sufficient. Also,don’t think “critical” and “atomic” clauses will help here since they only protect access on code areas, not memory.
Any idea how to solve this problem?
If you are not planning to reuse this code, you may use the following:
#pragma omp critical(queueLock)This directive will act as a “named” mutex, “queueLock”, in the above example. In you case, you must use it in enqueue, dequeue and queue_size functions, since they all use shared data.
If you plan to reuse this code, you should learn about the OpenMP locks (
omp_lock_t).