Imagine the following situation:
struct Args
{
bool arg;
};
void thing(Args arg)
{
if(arg.arg)
cout<<"arg.arg is true\n";
else
cout<<"arg.arg is false\n";
}
int main()
{
Args a;
a.arg=false;
thing(a);
}
Is the compiler smart enough to remove switch, if, and else branches that will obviously never be called over the course of the program? Must the variable in question that controls these statements be const? And lastly, is the right thing to do not to use variables at all but to use the preproccesor (I’m scared by the thought of that code)?
Just to clarify, the real situation is that I’m writing a class where the programmer can choose whether to enable a certain feature. Disabling the feature can save a lot of processing time on the server some bandwidth between the class and the server. I’m trying to figure out whether I should use a variable as a constructor argument, a preproccessor derivative, or some other solution. I don’t want to ever even consider the logic branch if the feature is enabled if it is disabled. I know that with the preproccessor solution will do this, but I want to avoid that mass use of #ifdef, #elseif and I want to be able to reuse one complied shared object. Source being open to the programmer is no problem as this is going to be open-source.
EDIT: I tested the bellow compile line and looked at the assembly. I can’t make full sense of it, but I did see a jump instruction (jne). Here is the assembly if anyone can make it out:
.file "blah.cpp"
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "arg.arg is true\n"
.LC1:
.string "arg.arg is false\n"
.text
.p2align 4,,15
.globl _Z5thing4Args
.type _Z5thing4Args, @function
_Z5thing4Args:
.LFB1003:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
cmpb $0, 8(%ebp)
jne .L5
movl $17, 8(%esp)
movl $.LC1, 4(%esp)
movl $_ZSt4cout, (%esp)
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
leave
.cfi_remember_state
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.p2align 4,,7
.p2align 3
.L5:
.cfi_restore_state
movl $16, 8(%esp)
movl $.LC0, 4(%esp)
movl $_ZSt4cout, (%esp)
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
leave
.cfi_def_cfa 4, 4
.cfi_restore 5
ret
.cfi_endproc
.LFE1003:
.size _Z5thing4Args, .-_Z5thing4Args
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB1004:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
andl $-16, %esp
subl $16, %esp
movl $17, 8(%esp)
movl $.LC1, 4(%esp)
movl $_ZSt4cout, (%esp)
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i
xorl %eax, %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE1004:
.size main, .-main
.p2align 4,,15
.type _GLOBAL__I__Z5thing4Args, @function
_GLOBAL__I__Z5thing4Args:
.LFB1009:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl $_ZStL8__ioinit, (%esp)
call _ZNSt8ios_base4InitC1Ev
movl $__dso_handle, 8(%esp)
movl $_ZStL8__ioinit, 4(%esp)
movl $_ZNSt8ios_base4InitD1Ev, (%esp)
call __cxa_atexit
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE1009:
.size _GLOBAL__I__Z5thing4Args, .-_GLOBAL__I__Z5thing4Args
.section .ctors,"aw",@progbits
.align 4
.long _GLOBAL__I__Z5thing4Args
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.weakref _ZL20__gthrw_pthread_oncePiPFvvE,pthread_once
.weakref _ZL27__gthrw_pthread_getspecificj,pthread_getspecific
.weakref _ZL27__gthrw_pthread_setspecificjPKv,pthread_setspecific
.weakref _ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_,pthread_create
.weakref _ZL20__gthrw_pthread_joinmPPv,pthread_join
.weakref _ZL21__gthrw_pthread_equalmm,pthread_equal
.weakref _ZL20__gthrw_pthread_selfv,pthread_self
.weakref _ZL22__gthrw_pthread_detachm,pthread_detach
.weakref _ZL22__gthrw_pthread_cancelm,pthread_cancel
.weakref _ZL19__gthrw_sched_yieldv,sched_yield
.weakref _ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t,pthread_mutex_lock
.weakref _ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t,pthread_mutex_trylock
.weakref _ZL31__gthrw_pthread_mutex_timedlockP15pthread_mutex_tPK8timespec,pthread_mutex_timedlock
.weakref _ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t,pthread_mutex_unlock
.weakref _ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t,pthread_mutex_init
.weakref _ZL29__gthrw_pthread_mutex_destroyP15pthread_mutex_t,pthread_mutex_destroy
.weakref _ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t,pthread_cond_broadcast
.weakref _ZL27__gthrw_pthread_cond_signalP14pthread_cond_t,pthread_cond_signal
.weakref _ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t,pthread_cond_wait
.weakref _ZL30__gthrw_pthread_cond_timedwaitP14pthread_cond_tP15pthread_mutex_tPK8timespec,pthread_cond_timedwait
.weakref _ZL28__gthrw_pthread_cond_destroyP14pthread_cond_t,pthread_cond_destroy
.weakref _ZL26__gthrw_pthread_key_createPjPFvPvE,pthread_key_create
.weakref _ZL26__gthrw_pthread_key_deletej,pthread_key_delete
.weakref _ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t,pthread_mutexattr_init
.weakref _ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti,pthread_mutexattr_settype
.weakref _ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t,pthread_mutexattr_destroy
.ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2"
.section .note.GNU-stack,"",@progbits
EDIT: I looked at the assembly more after adding asm("#aksdjfh") as suggested bellow, and I have found that the compiler does not get rid of it. So are #ifdefs the only option? Or is the jne instruction something that I can effectively ignore for performance?
Try it out yourself:
-O3turns optimizations on,-Stells the compiler to stop after generating assembly code, and-oselects where to place the output. Then you can inspect the “test.s” file and see if it optimized it or not. Obviously this requires some knowledge of assembly. You may also want-masm=intelif you, like me, find AT&T syntax unreadable and prefer Intel syntax.It might help to add lines like
asm("# this is something")to the code. These will appear as comments in the generated assembly, which may make it easier to identify the portions you’re interested in.On my machine, a snapshot of GCC 4.8 does not seem to optimize that dead code away. I added one of those asm comments to each branch to recognize them and it generated this:
If you look for those comments, you’ll find them both followed by a call to some
std::coutmember function.This happens because as is, the function is visible on other translation units: if you now make a
nasty.cppfile with a declarationvoid thing(Args arg);and a call with the valuetrue, the code must exist.So I experimented a bit further. If I mark the function as
static, meaning it is internal to that translation unit, GCC does indeed optimize the dead code away:You won’t find “This is the true branch” in that code. Also, note how the false branch was moved into the
mainfunction and thethingfunction doesn’t exist anymore. GCC simply inlined the function’s code and didn’t bother generating it because it’s not going to be used anywhere else now that I’ve addedstatic.If I mark it as
inline, it will still be visible outside, but apparently that’s enough of a hint for GCC to optimize it too. However, if you do this, you’ll have to make sure other translation units see the same definition, so that code can be generated as necessary for each of them.