Skip to content

Commit 052d861

Browse files
authored
Merge pull request #5184 from sjrd/rt-long-branchless-add-sub-neg
Opt: Branchless addition, subtraction and negation for `RuntimeLong`.
2 parents 7e9dbfc + 83cdafc commit 052d861

File tree

4 files changed

+179
-77
lines changed

4 files changed

+179
-77
lines changed

linker-private-library/src/main/scala/org/scalajs/linker/runtime/RuntimeLong.scala

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,6 @@ object RuntimeLong {
158158

159159
// Bitwise operations
160160

161-
@inline
162-
def not(a: RuntimeLong): RuntimeLong =
163-
new RuntimeLong(~a.lo, ~a.hi)
164-
165161
@inline
166162
def or(a: RuntimeLong, b: RuntimeLong): RuntimeLong =
167163
new RuntimeLong(a.lo | b.lo, a.hi | b.hi)
@@ -272,31 +268,31 @@ object RuntimeLong {
272268

273269
// Arithmetic operations
274270

275-
@inline
276-
def neg(a: RuntimeLong): RuntimeLong = {
277-
val lo = a.lo
278-
val hi = a.hi
279-
new RuntimeLong(inline_lo_unary_-(lo), inline_hi_unary_-(lo, hi))
280-
}
281-
282271
@inline
283272
def add(a: RuntimeLong, b: RuntimeLong): RuntimeLong = {
273+
// Hacker's Delight, Section 2-16
284274
val alo = a.lo
285-
val ahi = a.hi
286-
val bhi = b.hi
287-
val lo = alo + b.lo
275+
val blo = b.lo
276+
val lo = alo + blo
288277
new RuntimeLong(lo,
289-
if (inlineUnsignedInt_<(lo, alo)) ahi + bhi + 1 else ahi + bhi)
278+
a.hi + b.hi + (((alo & blo) | ((alo | blo) & ~lo)) >>> 31))
290279
}
291280

292281
@inline
293282
def sub(a: RuntimeLong, b: RuntimeLong): RuntimeLong = {
283+
/* Hacker's Delight, Section 2-16
284+
*
285+
* We deviate a bit from the original algorithm. Hacker's Delight uses
286+
* `- (... >>> 31)`. Instead, we use `+ (... >> 31)`. These are equivalent,
287+
* since `(x >> 31) == -(x >>> 31)` for all x. The variant with `+` folds
288+
* better when `a.hi` and `b.hi` are both known to be 0. This happens in
289+
* practice when `a` and `b` are 0-extended from `Int` values.
290+
*/
294291
val alo = a.lo
295-
val ahi = a.hi
296-
val bhi = b.hi
297-
val lo = alo - b.lo
292+
val blo = b.lo
293+
val lo = alo - blo
298294
new RuntimeLong(lo,
299-
if (inlineUnsignedInt_>(lo, alo)) ahi - bhi - 1 else ahi - bhi)
295+
a.hi - b.hi + (((~alo & blo) | (~(alo ^ blo) & lo)) >> 31))
300296
}
301297

302298
@inline
@@ -548,7 +544,8 @@ object RuntimeLong {
548544
if (isInt32(lo, hi)) {
549545
lo.toString()
550546
} else if (hi < 0) {
551-
"-" + toUnsignedString(inline_lo_unary_-(lo), inline_hi_unary_-(lo, hi))
547+
val neg = inline_negate(lo, hi)
548+
"-" + toUnsignedString(neg.lo, neg.hi)
552549
} else {
553550
toUnsignedString(lo, hi)
554551
}
@@ -656,8 +653,8 @@ object RuntimeLong {
656653
private def toDouble(lo: Int, hi: Int): Double = {
657654
if (hi < 0) {
658655
// We do asUint() on the hi part specifically for MinValue
659-
-(asUint(inline_hi_unary_-(lo, hi)) * TwoPow32 +
660-
asUint(inline_lo_unary_-(lo)))
656+
val neg = inline_negate(lo, hi)
657+
-(asUint(neg.hi) * TwoPow32 + asUint(neg.lo))
661658
} else {
662659
hi * TwoPow32 + asUint(lo)
663660
}
@@ -900,7 +897,7 @@ object RuntimeLong {
900897
val bAbs = inline_abs(blo, bhi)
901898
val absRLo = unsigned_/(aAbs.lo, aAbs.hi, bAbs.lo, bAbs.hi)
902899
if ((ahi ^ bhi) >= 0) absRLo // a and b have the same sign bit
903-
else inline_hiReturn_unary_-(absRLo, hiReturn)
900+
else inline_negate_hiReturn(absRLo, hiReturn)
904901
}
905902
}
906903

@@ -993,7 +990,7 @@ object RuntimeLong {
993990
val aAbs = inline_abs(alo, ahi)
994991
val bAbs = inline_abs(blo, bhi)
995992
val absRLo = unsigned_%(aAbs.lo, aAbs.hi, bAbs.lo, bAbs.hi)
996-
if (ahi < 0) inline_hiReturn_unary_-(absRLo, hiReturn)
993+
if (ahi < 0) inline_negate_hiReturn(absRLo, hiReturn)
997994
else absRLo
998995
}
999996
}
@@ -1124,12 +1121,6 @@ object RuntimeLong {
11241121
}
11251122
}
11261123

1127-
@inline
1128-
private def inline_hiReturn_unary_-(lo: Int, hi: Int): Int = {
1129-
hiReturn = inline_hi_unary_-(lo, hi)
1130-
inline_lo_unary_-(lo)
1131-
}
1132-
11331124
@inline
11341125
private def substring(s: String, start: Int): String = {
11351126
import scala.scalajs.js.JSStringOps.enableJSStringOps
@@ -1222,17 +1213,20 @@ object RuntimeLong {
12221213
(a ^ 0x80000000) >= (b ^ 0x80000000)
12231214

12241215
@inline
1225-
def inline_lo_unary_-(lo: Int): Int =
1226-
-lo
1216+
def inline_negate(lo: Int, hi: Int): RuntimeLong =
1217+
sub(new RuntimeLong(0, 0), new RuntimeLong(lo, hi))
12271218

12281219
@inline
1229-
def inline_hi_unary_-(lo: Int, hi: Int): Int =
1230-
if (lo != 0) ~hi else -hi
1220+
def inline_negate_hiReturn(lo: Int, hi: Int): Int = {
1221+
val n = inline_negate(lo, hi)
1222+
hiReturn = n.hi
1223+
n.lo
1224+
}
12311225

12321226
@inline
12331227
def inline_abs(lo: Int, hi: Int): RuntimeLong = {
12341228
if (hi < 0)
1235-
new RuntimeLong(inline_lo_unary_-(lo), inline_hi_unary_-(lo, hi))
1229+
inline_negate(lo, hi)
12361230
else
12371231
new RuntimeLong(lo, hi)
12381232
}

linker/shared/src/main/scala/org/scalajs/linker/backend/emitter/FunctionEmitter.scala

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2679,17 +2679,20 @@ private[emitter] class FunctionEmitter(sjsGen: SJSGen) {
26792679
else
26802680
genLongApplyStatic(LongImpl.add, newLhs, newRhs)
26812681
case Long_- =>
2682-
lhs match {
2683-
case LongLiteral(0L) =>
2684-
if (useBigIntForLongs)
2682+
if (useBigIntForLongs) {
2683+
lhs match {
2684+
case LongLiteral(0L) =>
26852685
wrapBigInt64(js.UnaryOp(JSUnaryOp.-, newRhs))
2686-
else
2687-
genLongApplyStatic(LongImpl.neg, newRhs)
2688-
case _ =>
2689-
if (useBigIntForLongs)
2686+
case _ =>
26902687
wrapBigInt64(js.BinaryOp(JSBinaryOp.-, newLhs, newRhs))
2691-
else
2692-
genLongApplyStatic(LongImpl.sub, newLhs, newRhs)
2688+
}
2689+
} else {
2690+
/* RuntimeLong does not have a dedicated method for 0L - b.
2691+
* The regular expansion done by the optimizer for the binary
2692+
* form is already optimal.
2693+
* So we don't special-case it here either.
2694+
*/
2695+
genLongApplyStatic(LongImpl.sub, newLhs, newRhs)
26932696
}
26942697
case Long_* =>
26952698
if (useBigIntForLongs)
@@ -2730,17 +2733,20 @@ private[emitter] class FunctionEmitter(sjsGen: SJSGen) {
27302733
else
27312734
genLongApplyStatic(LongImpl.and, newLhs, newRhs)
27322735
case Long_^ =>
2733-
lhs match {
2734-
case LongLiteral(-1L) =>
2735-
if (useBigIntForLongs)
2736+
if (useBigIntForLongs) {
2737+
lhs match {
2738+
case LongLiteral(-1L) =>
27362739
wrapBigInt64(js.UnaryOp(JSUnaryOp.~, newRhs))
2737-
else
2738-
genLongApplyStatic(LongImpl.not, newRhs)
2739-
case _ =>
2740-
if (useBigIntForLongs)
2740+
case _ =>
27412741
wrapBigInt64(js.BinaryOp(JSBinaryOp.^, newLhs, newRhs))
2742-
else
2743-
genLongApplyStatic(LongImpl.xor, newLhs, newRhs)
2742+
}
2743+
} else {
2744+
/* RuntimeLong does not have a dedicated method for -1L ^ b.
2745+
* The regular expansion done by the optimizer for the binary
2746+
* form is already optimal.
2747+
* So we don't special-case it here either.
2748+
*/
2749+
genLongApplyStatic(LongImpl.xor, newLhs, newRhs)
27442750
}
27452751
case Long_<< =>
27462752
if (useBigIntForLongs)

linker/shared/src/main/scala/org/scalajs/linker/backend/emitter/LongImpl.scala

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ private[linker] object LongImpl {
5858

5959
// Operator methods
6060

61-
final val neg = unaryOp("neg")
62-
final val not = unaryOp("not")
63-
6461
final val add = binaryOp("add")
6562
final val sub = binaryOp("sub")
6663
final val mul = binaryOp("mul")
@@ -96,7 +93,6 @@ private[linker] object LongImpl {
9693
final val fromDoubleBits = MethodName("fromDoubleBits", List(DoubleRef, ObjectRef), RTLongRef)
9794

9895
val OperatorMethods = Set(
99-
neg, not,
10096
add, sub, mul,
10197
divide, remainder, divideUnsigned, remainderUnsigned,
10298
or, and, xor, shl, shr, sar,

0 commit comments

Comments
 (0)