3535
3636#define BIGDECIMAL_VERSION "4.1.0"
3737
38- #define NTT_MULTIPLICATION_THRESHOLD 100
39- #define NEWTON_RAPHSON_DIVISION_THRESHOLD 200
38+ #define NTT_MULTIPLICATION_THRESHOLD 350
39+ #define NEWTON_RAPHSON_DIVISION_THRESHOLD 100
4040#define SIGNED_VALUE_MAX INTPTR_MAX
4141#define SIGNED_VALUE_MIN INTPTR_MIN
4242#define MUL_OVERFLOW_SIGNED_VALUE_P (a , b ) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, SIGNED_VALUE_MIN, SIGNED_VALUE_MAX)
@@ -4837,17 +4837,12 @@ VpSetPTR(Real *a, Real *b, Real *c, size_t *a_pos, size_t *b_pos, size_t *c_pos,
48374837 * a0 a1 .... an * b0
48384838 * +_____________________________
48394839 * c0 c1 c2 ...... cl
4840- * nc <---|
4841- * MaxAB |--------------------|
48424840 */
48434841VP_EXPORT size_t
48444842VpMult (Real * c , Real * a , Real * b )
48454843{
4846- size_t MxIndA , MxIndB , MxIndAB ;
4847- size_t ind_c , i , ii , nc ;
4848- size_t ind_as , ind_ae , ind_bs ;
4849- DECDIG carry ;
4850- DECDIG_DBL s ;
4844+ ssize_t a_batch_max , b_batch_max ;
4845+ DECDIG_DBL batch [15 ];
48514846
48524847 if (!VpIsDefOP (c , a , b , OP_SW_MULT )) return 0 ; /* No significant digit */
48534848
@@ -4871,9 +4866,6 @@ VpMult(Real *c, Real *a, Real *b)
48714866 a = b ;
48724867 b = w ;
48734868 }
4874- MxIndA = a -> Prec - 1 ;
4875- MxIndB = b -> Prec - 1 ;
4876- MxIndAB = a -> Prec + b -> Prec - 1 ;
48774869
48784870 /* set LHSV c info */
48794871
@@ -4887,51 +4879,40 @@ VpMult(Real *c, Real *a, Real *b)
48874879 goto Cleanup ;
48884880 }
48894881
4890- carry = 0 ;
4891- nc = ind_c = MxIndAB ;
4892- memset (c -> frac , 0 , (nc + 1 ) * sizeof (DECDIG )); /* Initialize c */
4893- c -> Prec = nc + 1 ; /* set precision */
4894- for (nc = 0 ; nc < MxIndAB ; ++ nc , -- ind_c ) {
4895- if (nc < MxIndB ) { /* The left triangle of the Fig. */
4896- ind_as = MxIndA - nc ;
4897- ind_ae = MxIndA ;
4898- ind_bs = MxIndB ;
4899- }
4900- else if (nc <= MxIndA ) { /* The middle rectangular of the Fig. */
4901- ind_as = MxIndA - nc ;
4902- ind_ae = MxIndA - (nc - MxIndB );
4903- ind_bs = MxIndB ;
4904- }
4905- else /* if (nc > MxIndA) */ { /* The right triangle of the Fig. */
4906- ind_as = 0 ;
4907- ind_ae = MxIndAB - nc - 1 ;
4908- ind_bs = MxIndB - (nc - MxIndA );
4909- }
4882+ c -> Prec = a -> Prec + b -> Prec ; /* set precision */
4883+ memset (c -> frac , 0 , c -> Prec * sizeof (DECDIG )); /* Initialize c */
49104884
4911- for (i = ind_as ; i <= ind_ae ; ++ i ) {
4912- s = (DECDIG_DBL )a -> frac [i ] * b -> frac [ind_bs -- ];
4913- carry = (DECDIG )(s / BASE );
4914- s -= (DECDIG_DBL )carry * BASE ;
4915- c -> frac [ind_c ] += (DECDIG )s ;
4916- if (c -> frac [ind_c ] >= BASE ) {
4917- s = c -> frac [ind_c ] / BASE ;
4918- carry += (DECDIG )s ;
4919- c -> frac [ind_c ] -= (DECDIG )(s * BASE );
4885+ // Process 8 decdigits at a time to reduce the number of carry operations.
4886+ a_batch_max = (a -> Prec - 1 ) / 8 ;
4887+ b_batch_max = (b -> Prec - 1 ) / 8 ;
4888+ for (ssize_t ibatch = a_batch_max ; ibatch >= 0 ; ibatch -- ) {
4889+ int isize = ibatch == a_batch_max ? (a -> Prec - 1 ) % 8 + 1 : 8 ;
4890+ for (ssize_t jbatch = b_batch_max ; jbatch >= 0 ; jbatch -- ) {
4891+ int jsize = jbatch == b_batch_max ? (b -> Prec - 1 ) % 8 + 1 : 8 ;
4892+ memset (batch , 0 , (isize + jsize - 1 ) * sizeof (DECDIG_DBL ));
4893+
4894+ // Perform multiplication without carry calculation.
4895+ // 999999999 * 999999999 * 8 < 2**63 - 1, so DECDIG_DBL can hold the intermediate sum without overflow.
4896+ for (int i = 0 ; i < isize ; i ++ ) {
4897+ for (int j = 0 ; j < jsize ; j ++ ) {
4898+ batch [i + j ] += (DECDIG_DBL )a -> frac [ibatch * 8 + i ] * b -> frac [jbatch * 8 + j ];
4899+ }
49204900 }
4921- if (carry ) {
4922- ii = ind_c ;
4923- while (ii -- > 0 ) {
4924- c -> frac [ii ] += carry ;
4925- if (c -> frac [ii ] >= BASE ) {
4926- carry = c -> frac [ii ] / BASE ;
4927- c -> frac [ii ] -= (carry * BASE );
4928- }
4929- else {
4930- break ;
4931- }
4932- }
4933- }
4934- }
4901+
4902+ // Add the batch result to c with carry calculation.
4903+ DECDIG_DBL carry = 0 ;
4904+ for (int k = isize + jsize - 2 ; k >= 0 ; k -- ) {
4905+ size_t l = (ibatch + jbatch ) * 8 + k + 1 ;
4906+ DECDIG_DBL s = c -> frac [l ] + batch [k ] + carry ;
4907+ c -> frac [l ] = (DECDIG )(s % BASE );
4908+ carry = (DECDIG_DBL )(s / BASE );
4909+ }
4910+
4911+ // Adding carry may exceed BASE, but it won't cause overflow of DECDIG.
4912+ // Exceeded value will be resolved in the carry operation of next (ibatch + jbatch - 1) batch.
4913+ // WARNING: This safety strongly relies on the current nested loop execution order.
4914+ c -> frac [(ibatch + jbatch ) * 8 ] += (DECDIG )carry ;
4915+ }
49354916 }
49364917
49374918Cleanup :
0 commit comments