Skip to content

Commit 65fd9b0

Browse files
committed
Optimize the worst case of RuntimeLong division and remainder.
TODO: Write down the proof.
1 parent 849495b commit 65fd9b0

File tree

4 files changed

+147
-86
lines changed

4 files changed

+147
-86
lines changed

linker-private-library/src/main/scala/org/scalajs/linker/runtime/RuntimeLong.scala

Lines changed: 90 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,9 @@ final class RuntimeLong(val lo: Int, val hi: Int) {
7676

7777
// A few operator-friendly methods used by the division algorithms
7878

79-
@inline private def <<(b: Int): RuntimeLong = RuntimeLong.shl(this, b)
80-
@inline private def >>>(b: Int): RuntimeLong = RuntimeLong.shr(this, b)
8179
@inline private def +(b: RuntimeLong): RuntimeLong = RuntimeLong.add(this, b)
8280
@inline private def -(b: RuntimeLong): RuntimeLong = RuntimeLong.sub(this, b)
81+
@inline private def *(b: RuntimeLong): RuntimeLong = RuntimeLong.mul(this, b)
8382
}
8483

8584
object RuntimeLong {
@@ -910,7 +909,7 @@ object RuntimeLong {
910909
}
911910

912911
def divideImpl(alo: Int, ahi: Int, blo: Int, bhi: Int): Int = {
913-
if (isZero(blo, bhi))
912+
if (bothZero(blo, bhi))
914913
throw new ArithmeticException("/ by zero")
915914

916915
if (isInt32(alo, ahi)) {
@@ -950,7 +949,7 @@ object RuntimeLong {
950949
}
951950

952951
def divideUnsignedImpl(alo: Int, ahi: Int, blo: Int, bhi: Int): Int = {
953-
if (isZero(blo, bhi))
952+
if (bothZero(blo, bhi))
954953
throw new ArithmeticException("/ by zero")
955954

956955
if (isUInt32(ahi)) {
@@ -968,7 +967,7 @@ object RuntimeLong {
968967
}
969968

970969
private def unsigned_/(alo: Int, ahi: Int, blo: Int, bhi: Int): Int = {
971-
// This method is not called if isInt32(alo, ahi) nor if isZero(blo, bhi)
970+
// This method is not called if isInt32(alo, ahi) nor if bothZero(blo, bhi)
972971
if (isUnsignedSafeDouble(ahi)) {
973972
if (isUnsignedSafeDouble(bhi)) {
974973
val aDouble = asSafeDouble(alo, ahi)
@@ -1003,7 +1002,7 @@ object RuntimeLong {
10031002
}
10041003

10051004
def remainderImpl(alo: Int, ahi: Int, blo: Int, bhi: Int): Int = {
1006-
if (isZero(blo, bhi))
1005+
if (bothZero(blo, bhi))
10071006
throw new ArithmeticException("/ by zero")
10081007

10091008
if (isInt32(alo, ahi)) {
@@ -1038,7 +1037,7 @@ object RuntimeLong {
10381037
}
10391038

10401039
def remainderUnsignedImpl(alo: Int, ahi: Int, blo: Int, bhi: Int): Int = {
1041-
if (isZero(blo, bhi))
1040+
if (bothZero(blo, bhi))
10421041
throw new ArithmeticException("/ by zero")
10431042

10441043
if (isUInt32(ahi)) {
@@ -1056,7 +1055,7 @@ object RuntimeLong {
10561055
}
10571056

10581057
private def unsigned_%(alo: Int, ahi: Int, blo: Int, bhi: Int): Int = {
1059-
// This method is not called if isInt32(alo, ahi) nor if isZero(blo, bhi)
1058+
// This method is not called if isInt32(alo, ahi) nor if bothZero(blo, bhi)
10601059
if (isUnsignedSafeDouble(ahi)) {
10611060
if (isUnsignedSafeDouble(bhi)) {
10621061
val aDouble = asSafeDouble(alo, ahi)
@@ -1088,71 +1087,97 @@ object RuntimeLong {
10881087
* remainder. Stores the hi word of the result in `hiReturn`, and returns
10891088
* the lo word.
10901089
*/
1090+
@inline // inlined twice; specializes for askQuotient
10911091
private def unsignedDivModHelper(alo: Int, ahi: Int, blo: Int, bhi: Int,
10921092
askQuotient: Boolean): Int = {
10931093

1094-
var shift =
1095-
inlineNumberOfLeadingZeros(blo, bhi) - inlineNumberOfLeadingZeros(alo, ahi)
1096-
val initialBShift = new RuntimeLong(blo, bhi) << shift
1097-
var bShiftLo = initialBShift.lo
1098-
var bShiftHi = initialBShift.hi
1099-
var remLo = alo
1100-
var remHi = ahi
1101-
var quotLo = 0
1102-
var quotHi = 0
1103-
1104-
/* Invariants:
1105-
* bShift == b << shift == b * 2^shift
1106-
* quot >= 0
1107-
* 0 <= rem < 2 * bShift
1108-
* quot * b + rem == a
1109-
*
1110-
* The loop condition should be
1111-
* while (shift >= 0 && !isUnsignedSafeDouble(remHi))
1112-
* but we manually inline isUnsignedSafeDouble because remHi is a var. If
1113-
* we let the optimizer inline it, it will first store remHi in a temporary
1114-
* val, which will explose the while condition as a while(true) + if +
1115-
* break, and we don't want that.
1116-
*/
1117-
while (shift >= 0 && (remHi & SafeDoubleHiMask) != 0) {
1118-
if (inlineUnsigned_>=(remLo, remHi, bShiftLo, bShiftHi)) {
1119-
val newRem =
1120-
new RuntimeLong(remLo, remHi) - new RuntimeLong(bShiftLo, bShiftHi)
1121-
remLo = newRem.lo
1122-
remHi = newRem.hi
1123-
if (shift < 32)
1124-
quotLo |= (1 << shift)
1125-
else
1126-
quotHi |= (1 << shift) // == (1 << (shift - 32))
1094+
// scalastyle:off return
1095+
1096+
val result = if (bothZero(bhi, blo & 0xffe00000)) {
1097+
// b < 2^21
1098+
1099+
val quotHi = Integer.divideUnsigned(ahi, blo) // takes care of the division by zero check
1100+
val k = ahi - quotHi * blo // remainder of the above division; k < blo
1101+
// (alo, k) is exact because it uses at most 32 + 21 = 53 bits
1102+
val remainingNum = asSafeDouble(alo, k)
1103+
if (askQuotient)
1104+
new RuntimeLong(rawToInt(remainingNum / blo.toDouble), quotHi)
1105+
else
1106+
new RuntimeLong(rawToInt(remainingNum % blo.toDouble), 0)
1107+
} else if ((bhi & 0xc0000000) == 0) {
1108+
// 2^21 <= b < 2^62
1109+
1110+
val longNum = new RuntimeLong(alo, ahi)
1111+
val longDivisor = new RuntimeLong(blo, bhi)
1112+
val approxDivisor = unsignedToDoubleApprox(blo, bhi)
1113+
val approxNum = unsignedToDoubleApprox(alo, ahi)
1114+
val approxQuot = fromUnsignedSafeDouble(approxNum / approxDivisor)
1115+
val approxRem = longNum - longDivisor * approxQuot
1116+
1117+
if (approxRem.hi < 0) {
1118+
if (askQuotient) approxQuot - new RuntimeLong(1, 0)
1119+
else approxRem + longDivisor
1120+
} else if (geu(approxRem, longDivisor)) {
1121+
if (askQuotient) approxQuot + new RuntimeLong(1, 0)
1122+
else approxRem - longDivisor
1123+
} else {
1124+
if (askQuotient) approxQuot
1125+
else approxRem
11271126
}
1128-
shift -= 1
1129-
val newBShift = new RuntimeLong(bShiftLo, bShiftHi) >>> 1
1130-
bShiftLo = newBShift.lo
1131-
bShiftHi = newBShift.hi
1127+
} else {
1128+
// 2^62 <= b
1129+
return unsignedDivModHugeDivisor(alo, ahi, blo, bhi, askQuotient)
11321130
}
11331131

1134-
// Now rem < 2^53, we can finish with a double division
1135-
if (inlineUnsigned_>=(remLo, remHi, blo, bhi)) {
1136-
val remDouble = asSafeDouble(remLo, remHi)
1137-
val bDouble = asSafeDouble(blo, bhi)
1132+
hiReturn = result.hi
1133+
result.lo
1134+
1135+
// scalastyle:on return
1136+
}
1137+
1138+
private def unsignedDivModHugeDivisor(alo: Int, ahi: Int, blo: Int, bhi: Int,
1139+
askQuotient: Boolean): Int = {
1140+
1141+
/* Called for b >= 2^62.
1142+
* Such huge divisors are practically useless, but they defeat the
1143+
* correction code of the fast algorithm above.
1144+
* Since there are only 4 possible outcomes for the quotient, we perform
1145+
* a "binary search" among those.
1146+
*/
1147+
1148+
val a = new RuntimeLong(alo, ahi)
1149+
val b = new RuntimeLong(blo, bhi)
11381150

1139-
if (askQuotient) {
1140-
val rem_div_bDouble = fromUnsignedSafeDouble(remDouble / bDouble)
1141-
val newQuot = new RuntimeLong(quotLo, quotHi) + rem_div_bDouble
1142-
hiReturn = newQuot.hi
1143-
newQuot.lo
1151+
var quot = 0
1152+
1153+
// Since b >= 2^62, we know that rem < 4*b (mathematically)
1154+
1155+
val rem1 = if (bhi >= 0) {
1156+
// Bit 63 is 0: 2*b does not overflow, and we need a 4-way search
1157+
val b2 = shl(b, 1)
1158+
if (geu(a, b2)) {
1159+
quot = 2
1160+
a - b2
11441161
} else {
1145-
val rem_mod_bDouble = remDouble % bDouble
1146-
hiReturn = unsignedSafeDoubleHi(rem_mod_bDouble)
1147-
unsignedSafeDoubleLo(rem_mod_bDouble)
1162+
a
11481163
}
11491164
} else {
1150-
if (askQuotient) {
1151-
hiReturn = quotHi
1152-
quotLo
1165+
a
1166+
}
1167+
1168+
// Now rem < 2*b (mathematically)
1169+
val rem1LTUb = ltu(rem1, b) // compute once for code size
1170+
if (askQuotient) {
1171+
hiReturn = 0
1172+
if (rem1LTUb) quot else quot + 1
1173+
} else {
1174+
if (rem1LTUb) {
1175+
hiReturn = rem1.hi
1176+
rem1.lo
11531177
} else {
1154-
hiReturn = remHi
1155-
remLo
1178+
val rem2 = rem1 - b
1179+
hiReturn = rem2.hi
1180+
rem2.lo
11561181
}
11571182
}
11581183
}
@@ -1163,9 +1188,9 @@ object RuntimeLong {
11631188
s.jsSubstring(start)
11641189
}
11651190

1166-
/** Tests whether the long (lo, hi) is 0. */
1167-
@inline def isZero(lo: Int, hi: Int): Boolean =
1168-
(lo | hi) == 0
1191+
/** Tests whether `a == 0 && b == 0` with a single comparison. */
1192+
@inline def bothZero(a: Int, b: Int): Boolean =
1193+
(a | b) == 0
11691194

11701195
/** Tests whether the long (lo, hi)'s mathematical value fits in a signed Int. */
11711196
@inline def isInt32(lo: Int, hi: Int): Boolean =
@@ -1285,17 +1310,6 @@ object RuntimeLong {
12851310
@inline def log2OfPowerOfTwo(i: Int): Int =
12861311
31 - Integer.numberOfLeadingZeros(i)
12871312

1288-
/** Returns the number of leading zeros in the given long (lo, hi). */
1289-
@inline def inlineNumberOfLeadingZeros(lo: Int, hi: Int): Int =
1290-
if (hi != 0) Integer.numberOfLeadingZeros(hi)
1291-
else Integer.numberOfLeadingZeros(lo) + 32
1292-
1293-
/** Tests whether the unsigned long (alo, ahi) is >= (blo, bhi). */
1294-
@inline
1295-
def inlineUnsigned_>=(alo: Int, ahi: Int, blo: Int, bhi: Int): Boolean =
1296-
if (ahi == bhi) inlineUnsignedInt_>=(alo, blo)
1297-
else inlineUnsignedInt_>=(ahi, bhi)
1298-
12991313
@inline
13001314
def inlineUnsignedInt_<(a: Int, b: Int): Boolean =
13011315
(a ^ 0x80000000) < (b ^ 0x80000000)

linker/shared/src/test/scala/org/scalajs/linker/LibrarySizeTest.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ class LibrarySizeTest {
7070
)
7171

7272
testLinkedSizes(
73-
expectedFastLinkSize = 147395,
74-
expectedFullLinkSizeWithoutClosure = 87201,
75-
expectedFullLinkSizeWithClosure = 20680,
73+
expectedFastLinkSize = 150451,
74+
expectedFullLinkSizeWithoutClosure = 89534,
75+
expectedFullLinkSizeWithClosure = 20699,
7676
classDefs,
7777
moduleInitializers = MainTestModuleInitializers
7878
)

project/Build.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,32 +2053,32 @@ object Build {
20532053
case `default212Version` =>
20542054
if (!useMinifySizes) {
20552055
Some(ExpectedSizes(
2056-
fastLink = 624000 to 625000,
2056+
fastLink = 627000 to 628000,
20572057
fullLink = 94000 to 95000,
20582058
fastLinkGz = 75000 to 79000,
20592059
fullLinkGz = 24000 to 25000,
20602060
))
20612061
} else {
20622062
Some(ExpectedSizes(
2063-
fastLink = 425000 to 426000,
2064-
fullLink = 282000 to 283000,
2065-
fastLinkGz = 60000 to 61000,
2063+
fastLink = 427000 to 428000,
2064+
fullLink = 284000 to 285000,
2065+
fastLinkGz = 61000 to 62000,
20662066
fullLinkGz = 43000 to 44000,
20672067
))
20682068
}
20692069

20702070
case `default213Version` =>
20712071
if (!useMinifySizes) {
20722072
Some(ExpectedSizes(
2073-
fastLink = 442000 to 443000,
2073+
fastLink = 445000 to 446000,
20742074
fullLink = 90000 to 91000,
20752075
fastLinkGz = 57000 to 58000,
20762076
fullLinkGz = 24000 to 25000,
20772077
))
20782078
} else {
20792079
Some(ExpectedSizes(
2080-
fastLink = 301000 to 302000,
2081-
fullLink = 258000 to 259000,
2080+
fastLink = 303000 to 304000,
2081+
fullLink = 261000 to 262000,
20822082
fastLinkGz = 47000 to 48000,
20832083
fullLinkGz = 42000 to 43000,
20842084
))

test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/lang/LongTest.scala

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,53 @@ class LongTest {
822822
assertThrows(classOf[ArithmeticException], JLong.remainderUnsigned(5L, 0L))
823823
}
824824

825+
@Test def divideRemainderUnsignedHugeDivisors(): Unit = {
826+
@inline def test(x: Long, y: Long, expectedQuot: Long, expectedRem: Long): Unit = {
827+
assertEquals(expectedQuot, JLong.divideUnsigned(x, y))
828+
assertEquals(expectedQuot, JLong.divideUnsigned(hideFromOptimizer(x), y))
829+
assertEquals(expectedQuot, JLong.divideUnsigned(x, hideFromOptimizer(y)))
830+
assertEquals(expectedQuot, JLong.divideUnsigned(hideFromOptimizer(x), hideFromOptimizer(y)))
831+
832+
assertEquals(expectedRem, JLong.remainderUnsigned(x, y))
833+
assertEquals(expectedRem, JLong.remainderUnsigned(hideFromOptimizer(x), y))
834+
assertEquals(expectedRem, JLong.remainderUnsigned(x, hideFromOptimizer(y)))
835+
assertEquals(expectedRem, JLong.remainderUnsigned(hideFromOptimizer(x), hideFromOptimizer(y)))
836+
}
837+
838+
test(-63003L, -2L, 0L, -63003L) // this particular case fails if we don't have the "huge" path at all
839+
test(-3L, -2L, 0L, -3L)
840+
test(-2L, -2L, 1L, 0L)
841+
test(-1L, -2L, 1L, 1L)
842+
843+
test(0x7ffffffffffffffeL, 0x7fffffffffffffffL, 0L, 0x7ffffffffffffffeL)
844+
test(0x7fffffffffffffffL, 0x7fffffffffffffffL, 1L, 0L)
845+
test(0x8000000000000000L, 0x7fffffffffffffffL, 1L, 1L)
846+
test(0xfffffffffffffffdL, 0x7fffffffffffffffL, 1L, 0x7ffffffffffffffeL)
847+
test(0xfffffffffffffffeL, 0x7fffffffffffffffL, 2L, 0L)
848+
test(0xffffffffffffffffL, 0x7fffffffffffffffL, 2L, 1L)
849+
850+
test(0x7ffffffffffffff8L, 0x7ffffffffffffffeL, 0L, 0x7ffffffffffffff8L)
851+
test(0x7ffffffffffffffeL, 0x7ffffffffffffffeL, 1L, 0L)
852+
test(0x8000000000000008L, 0x7ffffffffffffffeL, 1L, 0xaL)
853+
test(0xffffffffffff09e5L, 0x7ffffffffffffffeL, 1L, 0x7fffffffffff09e7L)
854+
test(0xfffffffffffffffbL, 0x7ffffffffffffffeL, 1L, 0x7ffffffffffffffdL)
855+
test(0xfffffffffffffffcL, 0x7ffffffffffffffeL, 2L, 0L)
856+
test(0xfffffffffffffffdL, 0x7ffffffffffffffeL, 2L, 1L)
857+
test(0xffffffffffffffffL, 0x7ffffffffffffffeL, 2L, 3L)
858+
859+
test(0x3ffffffffffffff0L, 0x4000000000000001L, 0L, 0x3ffffffffffffff0L)
860+
test(0x4000000000000000L, 0x4000000000000001L, 0L, 0x4000000000000000L)
861+
test(0x4000000000000001L, 0x4000000000000001L, 1L, 0L)
862+
test(0x4000000000000002L, 0x4000000000000001L, 1L, 1L)
863+
test(0x8000000000000001L, 0x4000000000000001L, 1L, 0x4000000000000000L)
864+
test(0x8000000000000002L, 0x4000000000000001L, 2L, 0L)
865+
test(0x8000000000000003L, 0x4000000000000001L, 2L, 1L)
866+
test(0xc000000000000002L, 0x4000000000000001L, 2L, 0x4000000000000000L)
867+
test(0xc000000000000003L, 0x4000000000000001L, 3L, 0L)
868+
test(0xc000000000000004L, 0x4000000000000001L, 3L, 1L)
869+
test(0xffffffffffffffffL, 0x4000000000000001L, 3L, 0x3ffffffffffffffcL)
870+
}
871+
825872
@Test def toUnsignedString(): Unit = {
826873
def test(x: Long, s: String, radix: Int = 10): Unit = {
827874
assertEquals(s, JLong.toUnsignedString(x, radix))

0 commit comments

Comments
 (0)