(Note: I saw this post, tell me if that’s the same problem: C: performance of pthread, low than single thrad)
I’m learning the pthread library. I wrote two versions of the same C program. The program takes a list of large BAM files and count the number of records using the samtools library.
Here is the single-threaded program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "bam.h"
/** maximum number of threads */
static const int MAX_COUNT_THREADS=4;
struct Param
{
char* filename;
};
static void printCount(const char* filename,unsigned long count)
{
fprintf(stdout,"%s\t%ld\n",filename,count);
}
static void* scan_bam(void* ptr)
{
unsigned long count=0;
struct Param* params=(struct Param*)ptr;
bamFile in=bam_open(params->filename, "r") ;
bam_header_t *header= NULL;
bam1_t *b=bam_init1();
time_t rawtime;
time ( &rawtime );
fprintf(stderr,"STARTING : %s %s",params->filename,ctime(&rawtime));
if(in==0)
{
fprintf(stderr,"Cannot read %s.\n",params->filename);
exit(EXIT_FAILURE);
}
header= bam_header_read(in);
while((bam_read1(in, b)) > 0)
{
++count;
}
bam_destroy1(b);
bam_header_destroy(header);
bam_close(in);
printCount(params->filename,count);
time ( &rawtime );
fprintf(stderr,"end for %s %s",params->filename,ctime(&rawtime));
free(params);
return NULL;
}
int main(int argc,char** argv)
{
int optind=1;
while(optind<argc)
{
struct Param* params=(struct Param*)malloc(sizeof(struct Param));
if(params==0)
{
fprintf(stderr,"Out of memory.\n");
exit(EXIT_FAILURE);
}
params->filename=argv[optind++];
scan_bam(params);
}
return EXIT_SUCCESS;
}
and the multi-threaded program. This program can use up to 5 threads and uses a condition-lock to count the number of threads and tell the main program to start a new thread if needed.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <time.h>
#include "bam.h"
#define VERIFY_ZERO(a) do {if(a!=0) {\
fprintf(stderr,"Test failed at %s line %d (ret=%d).\n",__FILE__,__LINE__,a);\
exit(EXIT_FAILURE);\
}} while(0)
/** maximum number of threads */
static const int MAX_COUNT_THREADS=5;
struct Param
{
pthread_t thread;
char* filename;
};
struct GLOBALS {
/** lock to print */
pthread_mutex_t mutex_print;
/** condition: wait for free thread */
pthread_cond_t accept_new_thread;
/** condition lock */
pthread_mutex_t accept_new_thread_lock;
/** number of running threads */
int number_of_threads;
};
static struct GLOBALS globals={
PTHREAD_MUTEX_INITIALIZER,
PTHREAD_COND_INITIALIZER,
PTHREAD_MUTEX_INITIALIZER,
0
};
static void printCount(const char* filename,unsigned long count)
{
int ret=pthread_mutex_lock(&globals.mutex_print);
VERIFY_ZERO(ret);
fprintf(stdout,"%s\t%ld\n",filename,count);
ret=pthread_mutex_unlock(&globals.mutex_print);
VERIFY_ZERO(ret);
}
static void* scan_bam(void* ptr)
{
unsigned long count=0;
struct Param* params=(struct Param*)ptr;
bamFile in=bam_open(params->filename, "r") ;
bam_header_t *header= NULL;
bam1_t *b=bam_init1();
time_t rawtime;
time ( &rawtime );
fprintf(stderr,"STARTING : %s %s",params->filename,ctime(&rawtime));
if(in==0)
{
fprintf(stderr,"Cannot read %s.\n",params->filename);
exit(EXIT_FAILURE);
}
header= bam_header_read(in);
while((bam_read1(in, b)) > 0)
{
++count;
}
bam_destroy1(b);
bam_header_destroy(header);
bam_close(in);
printCount(params->filename,count);
time ( &rawtime );
fprintf(stderr,"end1 for %s %s",params->filename,ctime(&rawtime));
pthread_mutex_lock( &globals.accept_new_thread_lock);
globals.number_of_threads--;
pthread_cond_signal(&globals.accept_new_thread);
pthread_mutex_unlock(&globals.accept_new_thread_lock);
time ( &rawtime );
fprintf(stderr,"end2 for %s %s",params->filename,ctime(&rawtime));
free(params);
return NULL;
}
int main(int argc,char** argv)
{
int optind=1;
while(optind<argc)
{
struct Param* params=(struct Param*)malloc(sizeof(struct Param));
if(params==0)
{
fprintf(stderr,"Out of memory.\n");
exit(EXIT_FAILURE);
}
pthread_mutex_lock(&globals.accept_new_thread_lock);
while (globals.number_of_threads > MAX_COUNT_THREADS)
{
pthread_cond_wait(&globals.accept_new_thread, &globals.accept_new_thread_lock);
}
globals.number_of_threads++;
pthread_mutex_unlock(&globals.accept_new_thread_lock);
params->filename=argv[optind++];
fprintf(stderr,"creating %s\n",params->filename);
pthread_create (&(params->thread), NULL, scan_bam,params);
pthread_detach(params->thread);
}
pthread_mutex_lock(&globals.accept_new_thread_lock);
while (globals.number_of_threads > 0)
{
pthread_cond_wait(&globals.accept_new_thread, &globals.accept_new_thread_lock);
}
pthread_mutex_unlock(&globals.accept_new_thread_lock);
pthread_cond_destroy(&globals.accept_new_thread);
return EXIT_SUCCESS;
}
Compile and run the multi-threaded program
gcc -O3 -Wall jeter.c -pthread -I/usr/local/package/samtools-0.1.18 -L/usr/local/package/samtools-0.1.18/ -lbam -lz
$ time (find .// -name "*recal.bam" | grep Item1[0-9] | xargs ./a.out )
creating ./Item10/recal.bam
creating ./Item11/recal.bam
creating ./Item12/recal.bam
creating ./Item13/recal.bam
creating ./Item14/recal.bam
creating ./Item15/recal.bam
STARTING : ./Item10/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item11/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item12/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item14/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item13/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item15/recal.bam Tue Dec 18 15:12:48 2012
./Item10/recal.bam 185784310
end1 for ./Item10/recal.bam Tue Dec 18 15:38:16 2012
end2 for ./Item10/recal.bam Tue Dec 18 15:38:16 2012
creating ./Item16/recal.bam
STARTING : ./Item16/recal.bam Tue Dec 18 15:38:16 2012
./Item11/recal.bam 204408906
end1 for ./Item11/recal.bam Tue Dec 18 15:41:52 2012
end2 for ./Item11/recal.bam Tue Dec 18 15:41:52 2012
creating ./Item17/recal.bam
STARTING : ./Item17/recal.bam Tue Dec 18 15:41:52 2012
./Item12/recal.bam 207766317
end1 for ./Item12/recal.bam Tue Dec 18 15:42:17 2012
end2 for ./Item12/recal.bam Tue Dec 18 15:42:17 2012
creating ./Item18/recal.bam
STARTING : ./Item18/recal.bam Tue Dec 18 15:42:17 2012
./Item15/recal.bam 224957522
end1 for ./Item15/recal.bam Tue Dec 18 15:44:54 2012
end2 for ./Item15/recal.bam Tue Dec 18 15:44:54 2012
creating ./Item19/recal.bam
STARTING : ./Item19/recal.bam Tue Dec 18 15:44:54 2012
./Item13/recal.bam 224548326
end1 for ./Item13/recal.bam Tue Dec 18 15:45:32 2012
end2 for ./Item13/recal.bam Tue Dec 18 15:45:32 2012
./Item14/recal.bam 241267346
end1 for ./Item14/recal.bam Tue Dec 18 15:48:28 2012
end2 for ./Item14/recal.bam Tue Dec 18 15:48:28 2012
./Item16/recal.bam 227446579
end1 for ./Item16/recal.bam Tue Dec 18 16:12:15 2012
end2 for ./Item16/recal.bam Tue Dec 18 16:12:15 2012
./Item17/recal.bam 215307379
end1 for ./Item17/recal.bam Tue Dec 18 16:13:05 2012
end2 for ./Item17/recal.bam Tue Dec 18 16:13:05 2012
./Item18/recal.bam 225914723
end1 for ./Item18/recal.bam Tue Dec 18 16:13:48 2012
end2 for ./Item18/recal.bam Tue Dec 18 16:13:48 2012
./Item19/recal.bam 225509630
end1 for ./Item19/recal.bam Tue Dec 18 16:14:06 2012
end2 for ./Item19/recal.bam Tue Dec 18 16:14:06 2012
.
real 61m17.560s
user 66m0.476s
sys 4m5.980s
Compile and run the single-threaded program
$ gcc -O3 -Wall jeter2.c -I/usr/local/package/samtools-0.1.18 -L/usr/local/package/samtools-0.1.18/ -lbam -lz
time (find .// -name "*recal.bam" | grep Item1[0-9] | xargs ./a.out )
STARTING : ./Item10/recal.bam Tue Dec 18 16:15:25 2012
./Item10/recal.bam 185784310
end for ./Item10/recal.bam Tue Dec 18 16:20:43 2012
STARTING : ./Item11/recal.bam Tue Dec 18 16:20:43 2012
./Item11/recal.bam 204408906
end for ./Item11/recal.bam Tue Dec 18 16:26:20 2012
STARTING : ./Item12/recal.bam Tue Dec 18 16:26:20 2012
./Item12/recal.bam 207766317
end for ./Item12/recal.bam Tue Dec 18 16:31:56 2012
STARTING : ./Item13/recal.bam Tue Dec 18 16:31:56 2012
./Item13/recal.bam 224548326
end for ./Item13/recal.bam Tue Dec 18 16:38:05 2012
STARTING : ./Item14/recal.bam Tue Dec 18 16:38:05 2012
./Item14/recal.bam 241267346
end for ./Item14/recal.bam Tue Dec 18 16:44:59 2012
STARTING : ./Item15/recal.bam Tue Dec 18 16:44:59 2012
./Item15/recal.bam 224957522
end for ./Item15/recal.bam Tue Dec 18 16:50:56 2012
STARTING : ./Item16/recal.bam Tue Dec 18 16:50:56 2012
./Item16/recal.bam 227446579
end for ./Item16/recal.bam Tue Dec 18 16:58:07 2012
STARTING : ./Item17/recal.bam Tue Dec 18 16:58:07 2012
./Item17/recal.bam 215307379
end for ./Item17/recal.bam Tue Dec 18 17:04:58 2012
STARTING : ./Item18/recal.bam Tue Dec 18 17:04:58 2012
./Item18/recal.bam 225914723
end for ./Item18/recal.bam Tue Dec 18 17:11:31 2012
STARTING : ./Item19/recal.bam Tue Dec 18 17:11:31 2012
./Item19/recal.bam 225509630
end for ./Item19/recal.bam Tue Dec 18 17:18:19 2012
.
real 62m54.503s
user 53m39.529s
sys 3m44.580s
both programs have been running for ~1H00. So the MT program was running slower than the other. Why ? Is it possible to speed-up that code ?
It looks like I/O operations (reading from files) dominate in your program, so it’s quite likely you won’t get much benefit from threading, no matter how well it is done.
Also note that the multithreaded variant is in fact a little bit faster; you need to compare real time. The user time is bigger due to multithreading, as it sums up the time spent in user mode by all threads. Same for kernel time.