1 /* An example of extending the speed program to measure routines not in GMP.
2 
3 Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 
32 /* The extension here is three versions of an mpn arithmetic mean.  These
33    aren't meant to be particularly useful, just examples.
34 
35    You can run something like the following to compare their speeds.
36 
37            ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
38 
39    On RISC chips, mean_open() might be fastest if the compiler is doing a
40    good job.  On the register starved x86s, mean_calls will be fastest.
41 
42 
43    Notes:
44 
45    SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
46    by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
47    files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
48 
49    The advantage of this #include "speed.c" scheme is that there's no
50    editing of a copy of that file, and new features in new versions of it
51    will be immediately available.
52 
53    In a real program the routines mean_calls() etc would probably be in
54    separate C or assembler source files, and just the measuring
55    speed_mean_calls() etc would be here.  Linking against other libraries
56    for things to measure is perfectly possible too.
57 
58    When attempting to compare two versions of the same named routine, say
59    like the generic and assembler versions of mpn_add_n(), creative use of
60    cc -D or #define is suggested, so one or both can be renamed and linked
61    into the same program.  It'll be much easier to compare them side by side
62    than with separate programs for each.
63 
64    common.c has notes on writing speed measuring routines.
65 
66    Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
67    not using libtool) to get common.o and other objects needed by speed.c.  */
68 
69 
70 #define SPEED_EXTRA_PROTOS                                              \
71   double speed_mean_calls (struct speed_params *s);			\
72   double speed_mean_open  (struct speed_params *s);			\
73   double speed_mean_open2 (struct speed_params *s);
74 
75 #define SPEED_EXTRA_ROUTINES            \
76   { "mean_calls",  speed_mean_calls  }, \
77   { "mean_open",   speed_mean_open   }, \
78   { "mean_open2",  speed_mean_open2  },
79 
80 #include "speed.c"
81 
82 
83 /* A straightforward implementation calling mpn subroutines.
84 
85    wp,size is set to (xp,size + yp,size) / 2.  The return value is the
86    remainder from the division.  The other versions are the same.  */
87 
88 mp_limb_t
mean_calls(mp_ptr wp,mp_srcptr xp,mp_srcptr yp,mp_size_t size)89 mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
90 {
91   mp_limb_t  c, ret;
92 
93   ASSERT (size >= 1);
94 
95   c = mpn_add_n (wp, xp, yp, size);
96   ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
97   wp[size-1] |= (c << (GMP_LIMB_BITS-1));
98   return ret;
99 }
100 
101 
102 /* An open-coded version, making one pass over the data.  The right shift is
103    done as the added limbs are produced.  The addition code follows
104    mpn/generic/add_n.c. */
105 
106 mp_limb_t
mean_open(mp_ptr wp,mp_srcptr xp,mp_srcptr yp,mp_size_t size)107 mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
108 {
109   mp_limb_t  w, wprev, x, y, c, ret;
110   mp_size_t  i;
111 
112   ASSERT (size >= 1);
113 
114   x = xp[0];
115   y = yp[0];
116 
117   wprev = x + y;
118   c = (wprev < x);
119   ret = (wprev & 1);
120 
121 #define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
122 
123   for (i = 1; i < size; i++)
124     {
125       x = xp[i];
126       y = yp[i];
127 
128       w = x + c;
129       c = (w < x);
130       w += y;
131       c += (w < y);
132 
133       wp[i-1] = RSHIFT (w, wprev);
134       wprev = w;
135     }
136 
137   wp[i-1] = RSHIFT (c, wprev);
138 
139   return ret;
140 }
141 
142 
143 /* Another one-pass version, but right shifting the source limbs rather than
144    the result limbs.  There's not much chance of this being better than the
145    above, but it's an alternative at least. */
146 
147 mp_limb_t
mean_open2(mp_ptr wp,mp_srcptr xp,mp_srcptr yp,mp_size_t size)148 mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
149 {
150   mp_limb_t  w, x, y, xnext, ynext, c, ret;
151   mp_size_t  i;
152 
153   ASSERT (size >= 1);
154 
155   x = xp[0];
156   y = yp[0];
157 
158   /* ret is the low bit of x+y, c is the carry out of that low bit add */
159   ret = (x ^ y) & 1;
160   c   = (x & y) & 1;
161 
162   for (i = 0; i < size-1; i++)
163     {
164       xnext = xp[i+1];
165       ynext = yp[i+1];
166       x = RSHIFT (xnext, x);
167       y = RSHIFT (ynext, y);
168 
169       w = x + c;
170       c = (w < x);
171       w += y;
172       c += (w < y);
173       wp[i] = w;
174 
175       x = xnext;
176       y = ynext;
177     }
178 
179   wp[i] = (x >> 1) + (y >> 1) + c;
180 
181   return ret;
182 }
183 
184 
185 /* The speed measuring routines are the same apart from which function they
186    run, so a macro is used.  Actually this macro is the same as
187    SPEED_ROUTINE_MPN_BINARY_N.  */
188 
189 #define SPEED_ROUTINE_MEAN(mean_fun)                    \
190   {                                                     \
191     unsigned  i;                                        \
192     mp_ptr    wp;                                       \
193     double    t;                                        \
194     TMP_DECL;                                  \
195                                                         \
196     SPEED_RESTRICT_COND (s->size >= 1);                 \
197                                                         \
198     TMP_MARK;                                  \
199     SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);   \
200                                                         \
201     speed_operand_src (s, s->xp, s->size);              \
202     speed_operand_src (s, s->yp, s->size);              \
203     speed_operand_dst (s, wp, s->size);                 \
204     speed_cache_fill (s);                               \
205                                                         \
206     speed_starttime ();                                 \
207     i = s->reps;                                        \
208     do                                                  \
209       mean_fun (wp, s->xp, s->yp, s->size);             \
210     while (--i != 0);                                   \
211     t = speed_endtime ();                               \
212                                                         \
213     TMP_FREE;                                  \
214     return t;                                           \
215   }
216 
217 double
speed_mean_calls(struct speed_params * s)218 speed_mean_calls (struct speed_params *s)
219 {
220   SPEED_ROUTINE_MEAN (mean_calls);
221 }
222 
223 double
speed_mean_open(struct speed_params * s)224 speed_mean_open (struct speed_params *s)
225 {
226   SPEED_ROUTINE_MEAN (mean_open);
227 }
228 
229 double
speed_mean_open2(struct speed_params * s)230 speed_mean_open2 (struct speed_params *s)
231 {
232   SPEED_ROUTINE_MEAN (mean_open2);
233 }
234