I’ve found that != and == are not the fastest ways for testing for

Question

0

Asked: June 5, 20262026-06-05T05:02:37+00:00 2026-06-05T05:02:37+00:00

I’ve found that != and == are not the fastest ways for testing for

0

I’ve found that != and == are not the fastest ways for testing for zero or non-zero.

bool nonZero1 = integer != 0;
xor eax, eax
test ecx, ecx
setne al

bool nonZero2 = integer < 0 || integer > 0;
test ecx, ecx
setne al

bool zero1 = integer == 0;
xor eax, eax
test ecx, ecx
sete al

bool zero2 = !(integer < 0 || integer > 0);
test ecx, ecx
sete al

Compiler: VC++ 11
Optimization flags: /O2 /GL /LTCG

This is the assembly output for x86-32. The second versions of both comparisons were ~12% faster on both x86-32 and x86-64. However, on x86-64 the instructions were identical (first versions looked exactly like the second versions), but the second versions were still faster.

Why doesn’t the compiler generate the faster version on x86-32?
Why are the second versions still faster on x86-64 when the assembly output is identical?

EDIT: I’ve added benchmarking code. ZERO: 1544ms, 1358ms NON_ZERO: 1544ms, 1358ms
http://pastebin.com/m7ZSUrcP
or
http://anonymouse.org/cgi-bin/anon-www.cgi/http://pastebin.com/m7ZSUrcP

Note: It’s probably inconvenient to locate these functions when compiled in a single source file, because main.asm goes quite big. I had zero1, zero2, nonZero1, nonZero2 in a separate source file.

EDIT2: Could someone with both VC++11 and VC++2010 installed run the benchmarking code and post the timings? It might indeed be a bug in VC++11.

Report

Leave an answer
Cancel reply

You must login to add an answer.

Need An Account,

1 Answer

Editorial Team · Answer 1 · 2026-06-05T05:02:38+00:00

EDIT: Saw OP’s assembly listing for my code. I doubt this is even a general bug with VS2011 now. This may simply be a special case bug for OP’s code. I ran OP’s code as-is with clang 3.2, gcc 4.6.2 and VS2010 and in all cases the max differences were at ~1%.

Just compiled the sources with suitable modifications to my ne.c file and the /O2 and /GL flags. Here’s the source

int ne1(int n) {
 return n != 0;
 }

 int ne2(int n) {
 return n < 0 || n > 0;
 }

 int ne3(int n) {
 return !(n == 0);
 }

int main() { int p = ne1(rand()), q = ne2(rand()), r = ne3(rand());}

and the corresponding assembly:

    ; Listing generated by Microsoft (R) Optimizing Compiler Version 16.00.30319.01 

    TITLE   D:\llvm_workspace\tests\ne.c
    .686P
    .XMM
    include listing.inc
    .model  flat

INCLUDELIB OLDNAMES

EXTRN   @__security_check_cookie@4:PROC
EXTRN   _rand:PROC
PUBLIC  _ne3
; Function compile flags: /Ogtpy
;   COMDAT _ne3
_TEXT   SEGMENT
_n$ = 8                         ; size = 4
_ne3    PROC                        ; COMDAT
; File d:\llvm_workspace\tests\ne.c
; Line 11
    xor eax, eax
    cmp DWORD PTR _n$[esp-4], eax
    setne   al
; Line 12
    ret 0
_ne3    ENDP
_TEXT   ENDS
PUBLIC  _ne2
; Function compile flags: /Ogtpy
;   COMDAT _ne2
_TEXT   SEGMENT
_n$ = 8                         ; size = 4
_ne2    PROC                        ; COMDAT
; Line 7
    xor eax, eax
    cmp eax, DWORD PTR _n$[esp-4]
    sbb eax, eax
    neg eax
; Line 8
    ret 0
_ne2    ENDP
_TEXT   ENDS
PUBLIC  _ne1
; Function compile flags: /Ogtpy
;   COMDAT _ne1
_TEXT   SEGMENT
_n$ = 8                         ; size = 4
_ne1    PROC                        ; COMDAT
; Line 3
    xor eax, eax
    cmp DWORD PTR _n$[esp-4], eax
    setne   al
; Line 4
    ret 0
_ne1    ENDP
_TEXT   ENDS
PUBLIC  _main
; Function compile flags: /Ogtpy
;   COMDAT _main
_TEXT   SEGMENT
_main   PROC                        ; COMDAT
; Line 14
    call    _rand
    call    _rand
    call    _rand
    xor eax, eax
    ret 0
_main   ENDP
_TEXT   ENDS
END

ne2() which used the <, > and || operators is clearly more expensive. ne1() and ne3() which use the == and != operators respectively, are terser and equivalent.

Visual Studio 2011 is in beta. I would consider this as a bug. My tests with two other compilers namely gcc 4.6.2 and clang 3.2, with the O2 optimization switch yielded the exact same assembly for all three tests (that I had) on my Windows 7 box. Here’s a summary:

$ cat ne.c

#include <stdbool.h>
bool ne1(int n) {
    return n != 0;
}

bool ne2(int n) {
    return n < 0 || n > 0;
}

bool ne3(int n) {
    return !(n != 0);
}

int main() {}

yields with gcc:

_ne1:
LFB0:
    .cfi_startproc
    movl    4(%esp), %eax
    testl   %eax, %eax
    setne   %al
    ret
    .cfi_endproc
LFE0:
    .p2align 2,,3
    .globl  _ne2
    .def    _ne2;   .scl    2;  .type   32; .endef
_ne2:
LFB1:
    .cfi_startproc
    movl    4(%esp), %edx
    testl   %edx, %edx
    setne   %al
    ret
    .cfi_endproc
LFE1:
    .p2align 2,,3
    .globl  _ne3
    .def    _ne3;   .scl    2;  .type   32; .endef
_ne3:
LFB2:
    .cfi_startproc
    movl    4(%esp), %ecx
    testl   %ecx, %ecx
    sete    %al
    ret
    .cfi_endproc
LFE2:
    .def    ___main;    .scl    2;  .type   32; .endef
    .section    .text.startup,"x"
    .p2align 2,,3
    .globl  _main
    .def    _main;  .scl    2;  .type   32; .endef
_main:
LFB3:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    call    ___main
    xorl    %eax, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
LFE3:

and with clang:

    .def     _ne1;
    .scl    2;
    .type   32;
    .endef
    .text
    .globl  _ne1
    .align  16, 0x90
_ne1:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne2;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne2
    .align  16, 0x90
_ne2:
    cmpl    $0, 4(%esp)
    setne   %al
    movzbl  %al, %eax
    ret

    .def     _ne3;
    .scl    2;
    .type   32;
    .endef
    .globl  _ne3
    .align  16, 0x90
_ne3:
    cmpl    $0, 4(%esp)
    sete    %al
    movzbl  %al, %eax
    ret

    .def     _main;
    .scl    2;
    .type   32;
    .endef
    .globl  _main
    .align  16, 0x90
_main:
    pushl   %ebp
    movl    %esp, %ebp
    calll   ___main
    xorl    %eax, %eax
    popl    %ebp
    ret

My suggestion would be to file this as a bug with Microsoft Connect.

Note: I compiled them as C source since I don’t think using the corresponding C++ compiler would make any significant change here.

Sign Up

Sign In

Forgot Password

The Archive Base Latest Questions

I’ve found that != and == are not the fastest ways for testing for

Leave an answerCancel reply

1 Answer

Leave an answer
Cancel reply