@@ -992,7 +992,7 @@ private QR256 mulSub(final long v3, final long v2, final long v1, final long v0,
992992 return new QR256 (q , new UInt256 (z3 , z2 , z1 , z0 ));
993993 }
994994
995- private UInt256 mulSubOverflow (final long v3 , final long v2 , final long v1 , final long v0 ) {
995+ private QR256 mulSubOverflow (final long v3 , final long v2 , final long v1 , final long v0 ) {
996996 // Overflow case: div2by1 quotient would be <1, 0>, but adjusts to <0, -1>
997997 // <p1, p0> = -1 * u0 = <u0 - 1, -u0>
998998 long res , borrow ;
@@ -1011,21 +1011,22 @@ private UInt256 mulSubOverflow(final long v3, final long v2, final long v1, fina
10111011 carry = u2 - 1 + ((Long .compareUnsigned (v2 , res ) < 0 ) ? 1 : 0 );
10121012
10131013 long z3 = v3 + u3 - carry - borrow ;
1014- // q = MAX may still be 1 too high; check if result >= modulus (i.e. negative wrapped)
1014+ // q = MAX may still be 1 too high - need to pass q - 1 (-2L) to addBack; check if result >=
1015+ // modulus (i.e. negative wrapped)
10151016 if (Long .compareUnsigned (z3 , u3 ) > 0
10161017 || (z3 == u3
10171018 && (Long .compareUnsigned (z2 , u2 ) > 0
10181019 || (z2 == u2
10191020 && (Long .compareUnsigned (z1 , u1 ) > 0
10201021 || (z1 == u1 && Long .compareUnsigned (z0 , u0 ) >= 0 )))))) {
1021- return addBack (z3 , z2 , z1 , z0 , 1L ). r ;
1022+ return addBack (z3 , z2 , z1 , z0 , - 2L ) ;
10221023 }
1023- return new UInt256 (z3 , z2 , z1 , z0 );
1024+ return new QR256 (- 1L , new UInt256 (z3 , z2 , z1 , z0 ) );
10241025 }
10251026
10261027 private QR256 reduceStep (
10271028 final long v4 , final long v3 , final long v2 , final long v1 , final long v0 , final long inv ) {
1028- if (v4 == u3 ) return new QR256 (- 1L , mulSubOverflow (v3 , v2 , v1 , v0 ) );
1029+ if (v4 == u3 ) return mulSubOverflow (v3 , v2 , v1 , v0 );
10291030 QR64 qr = div2by1 (v4 , v3 , u3 , inv );
10301031 if (qr .q != 0 ) return mulSub (qr .r , v2 , v1 , v0 , qr .q );
10311032 return new QR256 (0 , new UInt256 (v3 , v2 , v1 , v0 ));
@@ -1175,8 +1176,10 @@ private static QR64 div2by1(final long x1, final long x0, final long y, final lo
11751176 // --------------------------------------------------------------------------
11761177 // endregion
11771178
1178- // region Records
1179- // --------------------------------------------------------------------------
1179+ // region Quotient / Remainder types
1180+ // These types are used to store the result of division. Due to the nature of the algorithm
1181+ // (div2by1) quotient (q) can't
1182+ // ever exceed 64 bits while remainder (r) can range from 64 to 256 bits.
11801183 private record QR64 (long q , long r ) {}
11811184
11821185 private record QR128 (long q , UInt128 r ) {}
@@ -1185,6 +1188,11 @@ private record QR192(long q, UInt192 r) {}
11851188
11861189 private record QR256 (long q , UInt256 r ) {}
11871190
1191+ // --------------------------------------------------------------------------
1192+ // endregion
1193+
1194+ // region UInt* types
1195+ // --------------------------------------------------------------------------
11881196 record UInt64 (long u0 ) {
11891197 UInt64 shiftLeft (final int shift ) {
11901198 return (shift == 0 ) ? this : new UInt64 (u0 << shift );
@@ -1414,22 +1422,23 @@ private QR128 mulSub(final long x1, final long x0, final long q) {
14141422 return new QR128 (q , new UInt128 (z1 , z0 ));
14151423 }
14161424
1417- private UInt128 mulSubOverflow (final long v1 , final long v0 ) {
1425+ private QR128 mulSubOverflow (final long v1 , final long v0 ) {
14181426 // Overflow case: div2by1 quotient would be <1, 0>, but adjusts to <0, MAX>
14191427 // <p1, p0> = -1 * u0 = <u0 - 1, -u0>
14201428 long z0 = v0 + u0 ;
14211429 long carry = u0 - 1 + ((Long .compareUnsigned (v0 , z0 ) <= 0 ) ? 1 : 0 );
14221430
14231431 long z1 = v1 + u1 - carry ;
1424- // q = MAX may still be 1 too high; check if result >= modulus (i.e. negative wrapped)
1432+ // q = MAX may still be 1 too high - need to pass q - 1 (-2L) to addBack; check if result >=
1433+ // modulus (i.e. negative wrapped)
14251434 if (Long .compareUnsigned (z1 , u1 ) > 0 || (z1 == u1 && Long .compareUnsigned (z0 , u0 ) >= 0 )) {
1426- return addBack (z1 , z0 , 1L ). r ;
1435+ return addBack (z1 , z0 , - 2L ) ;
14271436 }
1428- return new UInt128 (z1 , z0 );
1437+ return new QR128 (- 1L , new UInt128 (z1 , z0 ) );
14291438 }
14301439
14311440 private QR128 reduceStep (final long v2 , final long v1 , final long v0 , final long inv ) {
1432- if (v2 == u1 ) return new QR128 (- 1L , mulSubOverflow (v1 , v0 ) );
1441+ if (v2 == u1 ) return mulSubOverflow (v1 , v0 );
14331442 QR64 qr = div2by1 (v2 , v1 , u1 , inv );
14341443 if (qr .q != 0 ) return mulSub (qr .r , v0 , qr .q );
14351444 return new QR128 (0 , new UInt128 (qr .r , v0 ));
@@ -1640,7 +1649,7 @@ private QR192 mulSub(final long v2, final long v1, final long v0, final long q)
16401649 return new QR192 (q , new UInt192 (z2 , z1 , z0 ));
16411650 }
16421651
1643- private UInt192 mulSubOverflow (final long v2 , final long v1 , final long v0 ) {
1652+ private QR192 mulSubOverflow (final long v2 , final long v1 , final long v0 ) {
16441653 // Overflow case: div2by1 quotient would be <1, 0>, but adjusts to <0, -1>
16451654 // <p1, p0> = -1 * u0 = <u0 - 1, -u0>
16461655 long z0 = v0 + u0 ;
@@ -1652,19 +1661,21 @@ private UInt192 mulSubOverflow(final long v2, final long v1, final long v0) {
16521661 carry = u1 - 1 + ((Long .compareUnsigned (v1 , res ) < 0 ) ? 1 : 0 );
16531662
16541663 long z2 = v2 - carry + u2 - borrow ;
1655- // q = MAX may still be 1 too high; check if result >= modulus (i.e. negative wrapped)
1664+ // q = MAX may still be 1 too high - need to pass q - 1 (-2L) to addBack; check if result >=
1665+ // modulus (i.e. negative wrapped)
16561666 if (Long .compareUnsigned (z2 , u2 ) > 0
16571667 || (z2 == u2
16581668 && (Long .compareUnsigned (z1 , u1 ) > 0
16591669 || (z1 == u1 && Long .compareUnsigned (z0 , u0 ) >= 0 )))) {
1660- return addBack (z2 , z1 , z0 , 1L ).r ;
1670+
1671+ return addBack (z2 , z1 , z0 , -2L );
16611672 }
1662- return new UInt192 (z2 , z1 , z0 );
1673+ return new QR192 (- 1L , new UInt192 (z2 , z1 , z0 ) );
16631674 }
16641675
16651676 private QR192 reduceStep (
16661677 final long v3 , final long v2 , final long v1 , final long v0 , final long inv ) {
1667- if (v3 == u2 ) return new QR192 (- 1L , mulSubOverflow (v2 , v1 , v0 ) );
1678+ if (v3 == u2 ) return mulSubOverflow (v2 , v1 , v0 );
16681679 QR64 qr = div2by1 (v3 , v2 , u2 , inv );
16691680 if (qr .q != 0 ) return mulSub (qr .r , v1 , v0 , qr .q );
16701681 return new QR192 (0 , new UInt192 (v2 , v1 , v0 ));
0 commit comments