-
Notifications
You must be signed in to change notification settings - Fork 14.9k
Open
Description
https://godbolt.org/z/a1f4hrPzT
enum Ordering {
LESS = -1,
EQUAL = 0,
GREATER = 1,
UNORDERED = 2
};
enum Ordering compare(float a, float b) {
if (a < b) {
return LESS;
} else if (a == b) {
return EQUAL;
} else if (a > b) {
return GREATER;
} else {
return UNORDERED;
}
}
int another_example(float a, float b) {
if (a < b) {
return 1;
} else if (a <= b) {
return 2;
} else {
return 3;
}
}
compare:
mov eax, -1
ucomiss xmm1, xmm0
ja .LBB0_3
xor eax, eax
ucomiss xmm0, xmm1
jne .LBB0_2
jp .LBB0_2
.LBB0_3:
ret
.LBB0_2:
setbe al
movzx eax, al
inc eax
ret
another_example:
xor ecx, ecx
ucomiss xmm1, xmm0
adc ecx, 2
ucomiss xmm1, xmm0
mov eax, 1
cmovbe eax, ecx
ret
I see no good reason why the partial ordering implementation cannot emit ucomiss
just once, e.g.
compare:
ucomiss xmm0, xmm1
mov eax, 2
jp .exit
mov eax, -1
jb .exit
seta al
.exit:
ret
another_example
looks even more straight-forward, with operands not even swapped.
ucomiss
is not exactly slow, but it's not fast either, so surely this is a missed optimization?