1 // recip2adic().
2 
3 // General includes.
4 #include "base/cl_sysdep.h"
5 
6 // Specification.
7 #include "base/digitseq/cl_2DS.h"
8 
9 
10 // Implementation.
11 
12 #include "base/digit/cl_2D.h"
13 #include "base/digitseq/cl_DS.h"
14 
15 // Break-even point of the Newton iteration vs. standard div2adic.
16 #if CL_USE_GMP
17 const unsigned int recip2adic_threshold = 620;
18 #else
19 // Use the old default values from CLN version <= 1.0.3 as a crude estimate.
20 const unsigned int recip2adic_threshold = 380;
21 #endif
22 
23 namespace cln {
24 
recip2adic(uintC len,const uintD * a_LSDptr,uintD * dest_LSDptr)25 void recip2adic (uintC len, const uintD* a_LSDptr, uintD* dest_LSDptr)
26 {
27 	// Method:
28 	// If len < threshold, use regular 2-adic division.
29 	// Else [Newton iteration] set n := ceiling(len/2),
30 	//   compute recursively b := recip2adic(a mod 2^(intDsize*n)),
31 	//   return 2*b-a*b^2 mod 2^(intDsize*2*n).
32 	CL_ALLOCA_STACK;
33 	var uintL k = 0; // number of Newton steps
34 	var uintC n = len;
35 	while (n >= recip2adic_threshold) {
36 		n = ceiling(n,2);
37 		k++;
38 	}
39 	// Nonrecursive step.
40 	var uintD* one_LSDptr;
41 	num_stack_alloc(n,,one_LSDptr=);
42 	lspref(one_LSDptr,0) = 1;
43 	clear_loop_lsp(one_LSDptr lspop 1,n-1);
44 	div2adic(n,one_LSDptr,a_LSDptr,dest_LSDptr);
45 	// Newton iteration.
46 	if (k > 0) {
47 		var uintD* b2_LSDptr;
48 		var uintD* prod_LSDptr;
49 		num_stack_alloc(len+1,,b2_LSDptr=);
50 		num_stack_alloc(2*len,,prod_LSDptr=);
51 		do {
52 			// n = ceiling(len/2^k)
53 			// Compute n2 = ceiling(len/2^(k-1)),
54 			// then n = ceiling(n2/2).
55 			k--;
56 			var uintC n2 = ((len-1)>>k)+1; // = 2*n or = 2*n-1
57 			// Set b := 2*b-a*b^2 mod 2^(intDsize*n2)
58 			cl_UDS_mul_square(dest_LSDptr,n,b2_LSDptr); // b^2
59 			cl_UDS_mul(b2_LSDptr,n2,a_LSDptr,n2,prod_LSDptr); // a*b^2
60 			clear_loop_lsp(dest_LSDptr lspop n,n2-n);
61 			shift1left_loop_lsp(dest_LSDptr,n+1); // (n+1 instead of n2 is ok)
62 			subfrom_loop_lsp(prod_LSDptr,dest_LSDptr,n2);
63 			n = n2;
64 		} while (k > 0);
65 	}
66 }
67 // Bit complexity (N := len): O(M(N)).
68 
69 }  // namespace cln
70