Skip to content

Commit 91e1b4f

Browse files
committed
Multiplication with 8-decdig batch
Perform multiplication with 8-decdigs * 8-decdigs batch. This will reduce carry operation by 1/8, and may enable vectorization.
1 parent 1f2894f commit 91e1b4f

File tree

2 files changed

+50
-55
lines changed

2 files changed

+50
-55
lines changed

ext/bigdecimal/bigdecimal.c

Lines changed: 36 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535

3636
#define BIGDECIMAL_VERSION "4.1.0"
3737

38-
#define NTT_MULTIPLICATION_THRESHOLD 100
39-
#define NEWTON_RAPHSON_DIVISION_THRESHOLD 200
38+
#define NTT_MULTIPLICATION_THRESHOLD 350
39+
#define NEWTON_RAPHSON_DIVISION_THRESHOLD 100
4040
#define SIGNED_VALUE_MAX INTPTR_MAX
4141
#define SIGNED_VALUE_MIN INTPTR_MIN
4242
#define MUL_OVERFLOW_SIGNED_VALUE_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, SIGNED_VALUE_MIN, SIGNED_VALUE_MAX)
@@ -4837,17 +4837,12 @@ VpSetPTR(Real *a, Real *b, Real *c, size_t *a_pos, size_t *b_pos, size_t *c_pos,
48374837
* a0 a1 .... an * b0
48384838
* +_____________________________
48394839
* c0 c1 c2 ...... cl
4840-
* nc <---|
4841-
* MaxAB |--------------------|
48424840
*/
48434841
VP_EXPORT size_t
48444842
VpMult(Real *c, Real *a, Real *b)
48454843
{
4846-
size_t MxIndA, MxIndB, MxIndAB;
4847-
size_t ind_c, i, ii, nc;
4848-
size_t ind_as, ind_ae, ind_bs;
4849-
DECDIG carry;
4850-
DECDIG_DBL s;
4844+
ssize_t a_batch_max, b_batch_max;
4845+
DECDIG_DBL batch[15];
48514846

48524847
if (!VpIsDefOP(c, a, b, OP_SW_MULT)) return 0; /* No significant digit */
48534848

@@ -4871,9 +4866,6 @@ VpMult(Real *c, Real *a, Real *b)
48714866
a = b;
48724867
b = w;
48734868
}
4874-
MxIndA = a->Prec - 1;
4875-
MxIndB = b->Prec - 1;
4876-
MxIndAB = a->Prec + b->Prec - 1;
48774869

48784870
/* set LHSV c info */
48794871

@@ -4887,51 +4879,40 @@ VpMult(Real *c, Real *a, Real *b)
48874879
goto Cleanup;
48884880
}
48894881

4890-
carry = 0;
4891-
nc = ind_c = MxIndAB;
4892-
memset(c->frac, 0, (nc + 1) * sizeof(DECDIG)); /* Initialize c */
4893-
c->Prec = nc + 1; /* set precision */
4894-
for (nc = 0; nc < MxIndAB; ++nc, --ind_c) {
4895-
if (nc < MxIndB) { /* The left triangle of the Fig. */
4896-
ind_as = MxIndA - nc;
4897-
ind_ae = MxIndA;
4898-
ind_bs = MxIndB;
4899-
}
4900-
else if (nc <= MxIndA) { /* The middle rectangular of the Fig. */
4901-
ind_as = MxIndA - nc;
4902-
ind_ae = MxIndA - (nc - MxIndB);
4903-
ind_bs = MxIndB;
4904-
}
4905-
else /* if (nc > MxIndA) */ { /* The right triangle of the Fig. */
4906-
ind_as = 0;
4907-
ind_ae = MxIndAB - nc - 1;
4908-
ind_bs = MxIndB - (nc - MxIndA);
4909-
}
4882+
c->Prec = a->Prec + b->Prec; /* set precision */
4883+
memset(c->frac, 0, c->Prec * sizeof(DECDIG)); /* Initialize c */
49104884

4911-
for (i = ind_as; i <= ind_ae; ++i) {
4912-
s = (DECDIG_DBL)a->frac[i] * b->frac[ind_bs--];
4913-
carry = (DECDIG)(s / BASE);
4914-
s -= (DECDIG_DBL)carry * BASE;
4915-
c->frac[ind_c] += (DECDIG)s;
4916-
if (c->frac[ind_c] >= BASE) {
4917-
s = c->frac[ind_c] / BASE;
4918-
carry += (DECDIG)s;
4919-
c->frac[ind_c] -= (DECDIG)(s * BASE);
4885+
// Process 8 decdigits at a time to reduce the number of carry operations.
4886+
a_batch_max = (a->Prec - 1) / 8;
4887+
b_batch_max = (b->Prec - 1) / 8;
4888+
for (ssize_t ibatch = a_batch_max; ibatch >= 0; ibatch--) {
4889+
int isize = ibatch == a_batch_max ? (a->Prec - 1) % 8 + 1 : 8;
4890+
for (ssize_t jbatch = b_batch_max; jbatch >= 0; jbatch--) {
4891+
int jsize = jbatch == b_batch_max ? (b->Prec - 1) % 8 + 1 : 8;
4892+
memset(batch, 0, (isize + jsize - 1) * sizeof(DECDIG_DBL));
4893+
4894+
// Perform multiplication without carry calculation.
4895+
// 999999999 * 999999999 * 8 < 2**63 - 1, so DECDIG_DBL can hold the intermediate sum without overflow.
4896+
for (int i = 0; i < isize; i++) {
4897+
for (int j = 0; j < jsize; j++) {
4898+
batch[i + j] += (DECDIG_DBL)a->frac[ibatch * 8 + i] * b->frac[jbatch * 8 + j];
4899+
}
49204900
}
4921-
if (carry) {
4922-
ii = ind_c;
4923-
while (ii-- > 0) {
4924-
c->frac[ii] += carry;
4925-
if (c->frac[ii] >= BASE) {
4926-
carry = c->frac[ii] / BASE;
4927-
c->frac[ii] -= (carry * BASE);
4928-
}
4929-
else {
4930-
break;
4931-
}
4932-
}
4933-
}
4934-
}
4901+
4902+
// Add the batch result to c with carry calculation.
4903+
DECDIG_DBL carry = 0;
4904+
for (int k = isize + jsize - 2; k >= 0; k--) {
4905+
size_t l = (ibatch + jbatch) * 8 + k + 1;
4906+
DECDIG_DBL s = c->frac[l] + batch[k] + carry;
4907+
c->frac[l] = (DECDIG)(s % BASE);
4908+
carry = (DECDIG_DBL)(s / BASE);
4909+
}
4910+
4911+
// Adding carry may exceed BASE, but it won't cause overflow of DECDIG.
4912+
// Exceeded value will be resolved in the carry operation of next (ibatch + jbatch - 1) batch.
4913+
// WARNING: This safety strongly relies on the current nested loop execution order.
4914+
c->frac[(ibatch + jbatch) * 8] += (DECDIG)carry;
4915+
}
49354916
}
49364917

49374918
Cleanup:

test/bigdecimal/test_vp_operation.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ def setup
1414
end
1515

1616
def test_vpmult
17+
# Max carry case
18+
[*32...40].repeated_permutation(2) do |n, m|
19+
x = BigDecimal('9' * BASE_FIG * n)
20+
y = BigDecimal('9' * BASE_FIG * m)
21+
assert_equal(x.to_i * y.to_i, x.vpmult(y))
22+
end
23+
1724
assert_equal(BigDecimal('121932631112635269'), BigDecimal('123456789').vpmult(BigDecimal('987654321')))
1825
assert_equal(BigDecimal('12193263.1112635269'), BigDecimal('123.456789').vpmult(BigDecimal('98765.4321')))
1926
x = 123**456
@@ -22,6 +29,13 @@ def test_vpmult
2229
end
2330

2431
def test_nttmult
32+
# Max carry case
33+
[*32...40].repeated_permutation(2) do |n, m|
34+
x = BigDecimal('9' * BASE_FIG * n)
35+
y = BigDecimal('9' * BASE_FIG * m)
36+
assert_equal(x.to_i * y.to_i, x.nttmult(y))
37+
end
38+
2539
[*1..32].repeated_permutation(2) do |a, b|
2640
x = BigDecimal(10 ** (BASE_FIG * a) / 7)
2741
y = BigDecimal(10 ** (BASE_FIG * b) / 13)

0 commit comments

Comments
 (0)