fld QWORD PTR [esp+32]fld QWORD PTR [esp+40]fucomip st, st(1) ; Compare ST(0) and ST(1), and set CF, PF, ZF in EFLAGSfstp st(0)seta al ; Set al if above (CF=0 and ZF=0).test al, alje .L2; Do something 1.L2:
fld QWORD PTR [esp+32]fld QWORD PTR [esp+40]fucomip st, st(1) ; (same thing as above)fstp st(0)setae al ; Set al if above or equal (CF=0).test al, alje .L5; Do something 2.L5:leaveret
Comparison Subtraction Carry Bit Zero Bit---------- ----------- --------- --------A < B --> A - B < 0 0 0A = B --> A - B = 0 1 1A > B --> A - B > 0 1 0
因此,实现A < B的分支可以在一条指令中完成,因为在这种情况下进位位是明确的只有,,即,
;; Implementation of "if (A < B) goto address;"cmp A, B ;; compare A to Bbcz address ;; Branch if Carry is Zero to the new address
但是,如果我们想要进行小于或等于的比较,我们需要对零标志进行额外的检查以捕获相等的情况。
;; Implementation of "if (A <= B) goto address;"cmp A, B ;; compare A to Bbcz address ;; branch if A < Bbzs address ;; also, Branch if the Zero bit is Set
void foo(unsigned size) {unsigned upper_bound = size - 1; // or any calculation that could produce UINT_MAXfor(unsigned i=0 ; i <= upper_bound ; i++)...
# clang7.0 -O3 closed-formcmp edi, -1 # n passed in EDI: x86-64 System V calling conventionje .LBB1_1 # if (n == UINT_MAX) return 0; // C++ loop runs 0 times# else fall through into the closed-form calcmov ecx, edi # zero-extend n into RCXlea eax, [rdi - 1] # n-1imul rax, rcx # n * (n-1) # 64-bitshr rax # n * (n-1) / 2add eax, edi # n + (stuff / 2) = n * (n+1) / 2 # truncated to 32-bitret # computed without possible overflow of the product before right shifting.LBB1_1:xor eax, eaxret
但是对于幼稚的版本,我们只是从clang中得到一个愚蠢的循环。
unsigned sum_1_to_n_naive(unsigned n) {unsigned total = 0;for (unsigned i = 0 ; i<=n ; ++i)total += i;return total;}