I’m doing a benchmark with two versions of a function to filter a linked list, one that receives a predicate function as an argument, and one in which the predicate is built into the function using a “macro template”. I would expect the first to run more slowly, because it is making a function call on every iteration, but on my box their speeds are about the same. Any clue about why is this happening? Any help is appreciated.
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
struct list_t {
int val;
struct list_t *next;
};
typedef struct list_t list_t;
// Removes elements not matching the predicate.
// NOTE: This is not freeing the removed elements.
list_t *keep(int (*predfun)(int,int), int predarg, list_t *list) {
list_t *this, *prev;
for (this=list, prev=NULL; this; prev=this, this=this->next) {
if (!predfun(this->val, predarg)) {
if (!prev) {
list = this->next;
}
else {
prev->next = this->next;
}
}
}
return list;
}
int less(int a, int b) {
return a<b;
}
// A "template" macro for the keep function.
// The idea is to embed the predicate into the function, so that we
// don't have to make a function call on each iteration.
#define build_keep(test) { \
list_t *this, *prev; \
\
for (this=list, prev=NULL; this; prev=this, this=this->next) { \
if (!(test)) { \
if (!prev) { \
list = this->next; \
} \
else { \
prev->next = this->next; \
} \
} \
} \
return list; \
}
list_t *keep_less(int arg, list_t *list) {
build_keep(this->val < arg);
}
#define LEN 1000000
#define MOD 1024
// Creates a new list.
list_t *buildlist() {
int i;
list_t *list, *last, *t;
list=NULL, last=NULL;
srand(0); // Using always the same seed for the benchmark.
for (i=0; i<LEN; i++) {
if (!last) {
last = malloc(sizeof(list_t));
list = last;
}
else {
last->next = malloc(sizeof(list_t));
last = last->next;
}
last->val = rand() % MOD;
}
last->next = NULL;
return list;
}
int main() {
struct timeval t0, t1;
list_t *list, *t;
// With macro.
list = buildlist();
//for (t=list; t; t=t->next) printf("%d ", t->val); printf("\n");
gettimeofday(&t0, NULL);
keep_less(500, list);
gettimeofday(&t1, NULL);
printf("keep_less: %lf\n", (1000000 * (t1.tv_sec - t0.tv_sec) + (t1.tv_usec - t0.tv_usec))/1000000.0);
//for (t=list; t; t=t->next) printf("%d ", t->val); printf("\n");
printf("\n");
// Without macro.
list = buildlist();
//for (t=list; t; t=t->next) printf("%d ", t->val); printf("\n");
gettimeofday(&t0, NULL);
keep(less, 500, list);
gettimeofday(&t1, NULL);
printf("keep: %lf\n", (1000000 * (t1.tv_sec - t0.tv_sec) + (t1.tv_usec - t0.tv_usec))/1000000.0);
//for (t=list; t; t=t->next) printf("%d ", t->val); printf("\n");
return 0;
}
The output here is:
keep_less: 0.181019
keep: 0.185590
I wouldn’t say the results are about the same – you do see a 4 milisecond (~2%) difference in favor of the immediate version.
This is actually relatively substantial – it’s possible to achieve such a high saving with saving just a function call only because your test functions did very little to start with. If you expected more out of this optimization, you might have stumbled upon an important lesson… (that personally, I have to re-learn once every few weeks :] )