1 /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
2
3 This file is part of LIBF7, which is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "libf7.h"
25
26 #ifndef __AVR_TINY__
27
28 #define ALIAS(X, Y) \
29 F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y;
30
31 #define DALIAS(...) // empty
32 #define LALIAS(...) // empty
33
34 #ifndef IN_LIBGCC2
35
36 #include <stdio.h>
37 #include <assert.h>
38
39 #define in_libgcc false
40
41 _Static_assert (sizeof (f7_t) == 10 && F7_MANT_BYTES == 7,
42 "libf7 will only work with 7-byte mantissa.");
43 #else
44
45 #define in_libgcc true
46
47 #if __SIZEOF_DOUBLE__ == 8
48 #undef DALIAS
49 #define DALIAS(X,Y) \
50 F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y;
51 #endif
52
53 #if __SIZEOF_LONG_DOUBLE__ == 8
54 #undef LALIAS
55 #define LALIAS(X,Y) \
56 F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y;
57 #endif
58
59 #endif // in libgcc
60
61 static F7_INLINE
f7_assert(bool x)62 void f7_assert (bool x)
63 {
64 if (!in_libgcc && !x)
65 __builtin_abort();
66 }
67
68 static F7_INLINE
abs_ssat16(int16_t a)69 int16_t abs_ssat16 (int16_t a)
70 {
71 _Sat _Fract sa = __builtin_avr_rbits (a);
72 return __builtin_avr_bitsr (__builtin_avr_absr (sa));
73 }
74
75 static F7_INLINE
add_ssat16(int16_t a,int16_t b)76 int16_t add_ssat16 (int16_t a, int16_t b)
77 {
78 _Sat _Fract sa = __builtin_avr_rbits (a);
79 _Sat _Fract sb = __builtin_avr_rbits (b);
80 return __builtin_avr_bitsr (sa + sb);
81 }
82
83 static F7_INLINE
sub_ssat16(int16_t a,int16_t b)84 int16_t sub_ssat16 (int16_t a, int16_t b)
85 {
86 _Sat _Fract sa = __builtin_avr_rbits (a);
87 _Sat _Fract sb = __builtin_avr_rbits (b);
88 return __builtin_avr_bitsr (sa - sb);
89 }
90
91 static F7_INLINE
ssat8_range(int16_t a,int8_t range)92 int8_t ssat8_range (int16_t a, int8_t range)
93 {
94 if (a >= range)
95 return range;
96 if (a <= -range)
97 return -range;
98 return a;
99 }
100
101
102 #define IN_LIBF7_H
103 #define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \
104 F7_UNUSED static const uint8_t F7_(const_##NAME##_msb) = M6; \
105 F7_UNUSED static const int16_t F7_(const_##NAME##_expo) = EXPO;
106 #include "libf7-const.def"
107 #undef F7_CONST_DEF
108 #undef IN_LIBF7_H
109
110
111 /*
112 libgcc naming converntions for conversions:
113
114 __float<fmode><fmode> : Convert float modes.
115 __floatun<imode><fmode>: Convert unsigned integral to float.
116 __fix<fmode><imode> : Convert float to signed integral.
117 __fixuns<fmode><imode> : Convert float to unsigned integral.
118 */
119
120
121 #ifdef F7MOD_floatundidf_
122 F7_WEAK
__floatundidf(uint64_t x)123 f7_double_t __floatundidf (uint64_t x)
124 {
125 f7_t xx;
126 f7_set_u64 (&xx, x);
127 return f7_get_double (&xx);
128 }
129 #endif // F7MOD_floatundidf_
130
131
132 #ifdef F7MOD_floatdidf_
133 F7_WEAK
__floatdidf(int64_t x)134 f7_double_t __floatdidf (int64_t x)
135 {
136 f7_t xx;
137 f7_set_s64 (&xx, x);
138 return f7_get_double (&xx);
139 }
140 #endif // F7MOD_floatdidf_
141
142
143 #ifdef F7MOD_init_
f7_init_impl(uint64_t mant,uint8_t flags,f7_t * cc,int16_t expo)144 f7_t* f7_init_impl (uint64_t mant, uint8_t flags, f7_t *cc, int16_t expo)
145 {
146 flags &= F7_FLAGS;
147 if (f7_class_number (flags))
148 {
149 uint8_t msb;
150 while ((__builtin_memcpy (&msb, (uint8_t*) &mant + 7, 1), msb))
151 {
152 mant >>= 1;
153 expo = add_ssat16 (expo, 1);
154 }
155 *(uint64_t*) cc->mant = mant;
156 cc->expo = add_ssat16 (expo, F7_MANT_BITS-1);
157
158 cc = f7_normalize_asm (cc);
159 }
160
161 cc->flags = flags;
162
163 return cc;
164 }
165 #endif // F7MOD_init_
166
167
168 #ifdef F7MOD_set_s16_
f7_set_s16_impl(f7_t * cc,int16_t i16)169 f7_t* f7_set_s16_impl (f7_t *cc, int16_t i16)
170 {
171 uint16_t u16 = (uint16_t) i16;
172 uint8_t flags = 0;
173 if (i16 < 0)
174 {
175 u16 = -u16;
176 flags = F7_FLAG_sign;
177 }
178 f7_set_u16_impl (cc, u16);
179 cc->flags = flags;
180 return cc;
181 }
182 #endif // F7MOD_set_s16_
183
184
185 #ifdef F7MOD_set_u16_
f7_set_u16_impl(f7_t * cc,uint16_t u16)186 f7_t* f7_set_u16_impl (f7_t *cc, uint16_t u16)
187 {
188 f7_clr (cc);
189 F7_MANT_HI2 (cc) = u16;
190 cc->expo = 15;
191 return f7_normalize_asm (cc);
192 }
193 #endif // F7MOD_set_u16_
194
195
196 #ifdef F7MOD_set_s32_
f7_set_s32(f7_t * cc,int32_t i32)197 f7_t* f7_set_s32 (f7_t *cc, int32_t i32)
198 {
199 uint32_t u32 = (uint32_t) i32;
200 uint8_t flags = 0;
201 if (i32 < 0)
202 {
203 u32 = -u32;
204 flags = F7_FLAG_sign;
205 }
206 cc = f7_set_u32 (cc, u32);
207 cc->flags = flags;
208 return cc;
209 }
ALIAS(f7_set_s32,f7_floatsidf)210 ALIAS (f7_set_s32, f7_floatsidf)
211 #endif // F7MOD_set_s32_
212
213
214 #ifdef F7MOD_set_u32_
215 f7_t* f7_set_u32 (f7_t *cc, uint32_t u32)
216 {
217 f7_clr (cc);
218 F7_MANT_HI4 (cc) = u32;
219 cc->expo = 31;
220 return f7_normalize_asm (cc);
221 }
ALIAS(f7_set_u32,f7_floatunsidf)222 ALIAS (f7_set_u32, f7_floatunsidf)
223 #endif // F7MOD_set_u32_
224
225
226 // IEEE 754 single
227 // float = s bbbbbbbb mmmmmmmmmmmmmmmmmmmmmmm
228 // 31
229 // s = sign
230 // b = biased exponent, bias = 127
231 // m = mantissa
232
233 // +0 = 0 0 0
234 // -0 = 1 0 0
235 // Inf = S B 0 = S * Inf, B = 0xff
236 // NaN = S B M, B = 0xff, M != 0
237 // Sub-normal = S 0 M = S * 0.M * 2^{1 - bias}, M != 0
238 // Normal = S B M = S * 1.M * 2^{B - bias}, B = 1 ... 0xfe
239
240 #define FLT_DIG_EXP 8
241 #define FLT_DIG_MANT (31 - FLT_DIG_EXP)
242 #define FLT_MAX_EXP ((1 << FLT_DIG_EXP) - 1)
243 #define FLT_EXP_BIAS (FLT_MAX_EXP >> 1)
244
245 #ifdef F7MOD_set_float_
246 F7_WEAK
247 void f7_set_float (f7_t *cc, float f)
248 {
249 uint32_t val32;
250
251 _Static_assert (__SIZEOF_FLOAT__ == 4, "");
252 _Static_assert (__FLT_MANT_DIG__ == 24, "");
253 __builtin_memcpy (&val32, &f, __SIZEOF_FLOAT__);
254
255 uint16_t val16 = val32 >> 16;
256 val16 >>= FLT_DIG_MANT - 16;
257
258 uint8_t expo_biased = val16 & FLT_MAX_EXP;
259 bool sign = val16 & (1u << FLT_DIG_EXP);
260
261 f7_clr (cc);
262
263 uint32_t mant = val32 & ((1ul << FLT_DIG_MANT) -1);
264
265 if (mant == 0)
266 {
267 if (expo_biased == 0)
268 return;
269 if (expo_biased >= FLT_MAX_EXP)
270 return f7_set_inf (cc, sign);
271 }
272
273 if (expo_biased == 0)
274 expo_biased = 1; // Sub-normal: biased expo of 1 was encoded as 0.
275 else if (expo_biased < FLT_MAX_EXP)
276 mant |= (1ul << FLT_DIG_MANT);
277 else
278 return f7_set_nan (cc);
279
280 __builtin_memcpy (& F7_MANT_HI4 (cc), &mant, 4);
281
282 cc->expo = expo_biased - FLT_EXP_BIAS + 31 - FLT_DIG_MANT;
283 f7_normalize_asm (cc);
284 f7_set_sign (cc, sign);
285 }
ALIAS(f7_set_float,f7_extendsfdf2)286 ALIAS (f7_set_float, f7_extendsfdf2)
287 #endif // F7MOD_set_float_
288
289
290 #ifdef F7MOD_get_float_
291 static F7_INLINE
292 float make_float (uint32_t x)
293 {
294 float ff;
295 __builtin_memcpy (&ff, &x, 4);
296 return ff;
297
298 }
299
300 F7_WEAK
f7_get_float(const f7_t * aa)301 float f7_get_float (const f7_t *aa)
302 {
303 uint8_t a_class = f7_classify (aa);
304
305 if (f7_class_nan (a_class))
306 return make_float (0xffc00000 /* NaN: Biased expo = 0xff, mant != 0 */);
307
308 uint32_t mant;
309 __builtin_memcpy (&mant, &F7_MANT_CONST_HI4 (aa), 4);
310
311 uint8_t expo8 = 0;
312 uint8_t mant_offset = FLT_DIG_EXP;
313 int16_t c_expo = add_ssat16 (aa->expo, FLT_EXP_BIAS);
314
315 if (f7_class_zero (a_class) || c_expo <= -FLT_DIG_MANT)
316 {
317 // Zero or tiny.
318 return 0.0f;
319 }
320 else if (c_expo >= FLT_MAX_EXP || f7_class_inf (a_class))
321 {
322 // Inf or overflow.
323 expo8 = FLT_MAX_EXP;
324 mant = 0;
325 }
326 else if (c_expo > 0)
327 {
328 // Normal.
329 expo8 = c_expo;
330 }
331 else
332 {
333 // Sub-normal: -DIG_MANT < c_expo <= 0.
334 // Encoding of 0 represents a biased exponent of 1.
335 // mant_offset in 9...31.
336 expo8 = 0;
337 mant_offset += 1 - c_expo;
338 }
339
340 uint16_t expo16 = expo8 << (FLT_DIG_MANT - 16);
341
342 if (f7_class_sign (a_class))
343 expo16 |= 1u << (FLT_DIG_EXP + FLT_DIG_MANT - 16);
344
345 mant >>= mant_offset;
346
347 __asm ("cbr %T0%t2, 1 << (7 & %2)" "\n\t"
348 "or %C0, %A1" "\n\t"
349 "or %D0, %B1"
350 : "+d" (mant)
351 : "r" (expo16), "n" (FLT_DIG_MANT));
352
353 return make_float (mant);
354 }
ALIAS(f7_get_float,f7_truncdfsf2)355 F7_PURE ALIAS (f7_get_float, f7_truncdfsf2)
356 #endif // F7MOD_get_float_
357
358 #define DBL_DIG_EXP 11
359 #define DBL_DIG_MANT (63 - DBL_DIG_EXP)
360 #define DBL_MAX_EXP ((1 << DBL_DIG_EXP) - 1)
361 #define DBL_EXP_BIAS (DBL_MAX_EXP >> 1)
362
363
364 #ifdef F7MOD_set_double_
365 void f7_set_double_impl (f7_double_t val64, f7_t *cc)
366 {
367 f7_clr (cc);
368 register uint64_t mant __asm ("r18") = val64 & ((1ull << DBL_DIG_MANT) -1);
369
370 uint16_t val16 = 3[(uint16_t*) & val64];
371 val16 >>= DBL_DIG_MANT - 48;
372
373 uint16_t expo_biased = val16 & DBL_MAX_EXP;
374 bool sign = val16 & (1u << DBL_DIG_EXP);
375
376 if (mant == 0)
377 {
378 if (expo_biased == 0)
379 return;
380 if (expo_biased >= DBL_MAX_EXP)
381 return f7_set_inf (cc, sign);
382 }
383 __asm ("" : "+r" (mant));
384
385 if (expo_biased == 0)
386 expo_biased = 1; // Sub-normal: biased expo of 1 was encoded as 0.
387 else if (expo_biased < DBL_MAX_EXP)
388 mant |= (1ull << DBL_DIG_MANT);
389 else
390 return f7_set_nan (cc);
391
392 *(uint64_t*) & cc->mant = mant;
393
394 cc->expo = expo_biased - DBL_EXP_BIAS + 63 - DBL_DIG_MANT - 8;
395 f7_normalize_asm (cc);
396 f7_set_sign (cc, sign);
397 }
398 #endif // F7MOD_set_double_
399
400
401 #ifdef F7MOD_set_pdouble_
f7_set_pdouble(f7_t * cc,const f7_double_t * val64)402 void f7_set_pdouble (f7_t *cc, const f7_double_t *val64)
403 {
404 f7_set_double (cc, *val64);
405 }
406 #endif // F7MOD_set_pdouble_
407
408
409 #ifdef F7MOD_get_double_
410 static F7_INLINE
clr_r18(void)411 uint64_t clr_r18 (void)
412 {
413 extern void __clr_8 (void);
414 register uint64_t r18 __asm ("r18");
415 __asm ("%~call %x[f]" : "=r" (r18) : [f] "i" (__clr_8));
416 return r18;
417 }
418
419 static F7_INLINE
make_double(uint64_t x)420 f7_double_t make_double (uint64_t x)
421 {
422 register f7_double_t r18 __asm ("r18") = x;
423 __asm ("" : "+r" (r18));
424 return r18;
425 }
426
427 F7_WEAK
f7_get_double(const f7_t * aa)428 f7_double_t f7_get_double (const f7_t *aa)
429 {
430 uint8_t a_class = f7_classify (aa);
431
432 if (f7_class_nan (a_class))
433 {
434 uint64_t nan = clr_r18() | (0x7fffull << 48);
435 return make_double (nan);
436 }
437
438 uint64_t mant;
439 __builtin_memcpy (&mant, & aa->mant, 8);
440
441 mant &= 0x00ffffffffffffff;
442
443 // FIXME: For subnormals, rounding is premature and should be
444 // done *after* the mantissa has been shifted into place
445 // (or the round value be shifted left accordingly).
446 // Round.
447 mant += 1u << (F7_MANT_BITS - (1 + DBL_DIG_MANT) - 1);
448
449 uint8_t dex;
450 register uint64_t r18 __asm ("r18") = mant;
451 // dex = Overflow ? 1 : 0.
452 __asm ("bst %T[mant]%T[bitno]" "\n\t"
453 "clr %0" "\n\t"
454 "bld %0,0"
455 : "=r" (dex), [mant] "+r" (r18)
456 : [bitno] "n" (64 - 8));
457
458 mant = r18 >> dex;
459
460 uint16_t expo16 = 0;
461 uint16_t mant_offset = DBL_DIG_EXP - 8;
462 int16_t c_expo = add_ssat16 (aa->expo, DBL_EXP_BIAS + dex);
463
464 if (f7_class_zero (a_class) || c_expo <= -DBL_DIG_MANT)
465 {
466 // Zero or tiny.
467 return make_double (clr_r18());
468 }
469 else if (c_expo >= DBL_MAX_EXP || f7_class_inf (a_class))
470 {
471 // Inf or overflow.
472 expo16 = DBL_MAX_EXP;
473 mant = clr_r18();
474 }
475 else if (c_expo > 0)
476 {
477 // Normal.
478 expo16 = c_expo;
479 }
480 else
481 {
482 // Sub-normal: -DIG_MANT < c_expo <= 0.
483 // Encoding expo of 0 represents a biased exponent of 1.
484 // mant_offset in 5...55 = 63-8.
485 mant_offset += 1 - c_expo;
486 }
487
488 expo16 <<= (DBL_DIG_MANT - 48);
489
490 if (f7_class_sign (a_class))
491 expo16 |= 1u << (DBL_DIG_EXP + DBL_DIG_MANT - 48);
492
493 // mant >>= mant_offset;
494 mant = f7_lshrdi3 (mant, mant_offset);
495
496 r18 = mant;
497 __asm ("cbr %T0%t2, 1 << (7 & %2)" "\n\t"
498 "or %r0+6, %A1" "\n\t"
499 "or %r0+7, %B1"
500 : "+r" (r18)
501 : "r" (expo16), "n" (DBL_DIG_MANT));
502
503 return make_double (r18);
504 }
505 #endif // F7MOD_get_double_
506
507
508 #ifdef F7MOD_fabs_
509 F7_WEAK
f7_fabs(f7_t * cc,const f7_t * aa)510 void f7_fabs (f7_t *cc, const f7_t *aa)
511 {
512 f7_abs (cc, aa);
513 }
514 #endif // F7MOD_fabs_
515
516
517 #ifdef F7MOD_neg_
518 F7_WEAK
f7_neg(f7_t * cc,const f7_t * aa)519 f7_t* f7_neg (f7_t *cc, const f7_t *aa)
520 {
521 f7_copy (cc, aa);
522
523 uint8_t c_class = f7_classify (cc);
524
525 if (! f7_class_zero (c_class))
526 cc->sign = ! f7_class_sign (c_class);
527
528 return cc;
529 }
530 #endif // F7MOD_neg_
531
532
533 #ifdef F7MOD_frexp_
534 F7_WEAK
f7_frexp(f7_t * cc,const f7_t * aa,int * expo)535 void f7_frexp (f7_t *cc, const f7_t *aa, int *expo)
536 {
537 uint8_t a_class = f7_classify (aa);
538
539 if (f7_class_nan (a_class))
540 return f7_set_nan (cc);
541
542 if (f7_class_inf (a_class) || aa->expo == INT16_MAX)
543 return f7_set_inf (cc, f7_class_sign (a_class));
544
545 if (! f7_msbit (aa))
546 {
547 *expo = 0;
548 return f7_clr (cc);
549 }
550
551 *expo = 1 + aa->expo;
552 cc->flags = a_class & F7_FLAG_sign;
553 cc->expo = -1;
554 f7_copy_mant (cc, aa);
555 }
556 #endif // F7MOD_frexp_
557
558 #ifdef F7MOD_get_s16_
559 F7_WEAK
f7_get_s16(const f7_t * aa)560 int16_t f7_get_s16 (const f7_t *aa)
561 {
562 extern int16_t to_s16 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
563 return to_s16 (aa, 0xf);
564 }
565 #endif // F7MOD_get_s16_
566
567
568 #ifdef F7MOD_get_s32_
569 F7_WEAK
f7_get_s32(const f7_t * aa)570 int32_t f7_get_s32 (const f7_t *aa)
571 {
572 extern int32_t to_s32 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
573 return to_s32 (aa, 0x1f);
574 }
ALIAS(f7_get_s32,f7_fixdfsi)575 F7_PURE ALIAS (f7_get_s32, f7_fixdfsi)
576 #endif // F7MOD_get_s32_
577
578
579 #ifdef F7MOD_get_s64_
580 F7_WEAK
581 int64_t f7_get_s64 (const f7_t *aa)
582 {
583 extern int64_t to_s64 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
584 return to_s64 (aa, 0x3f);
585 }
ALIAS(f7_get_s64,f7_fixdfdi)586 F7_PURE ALIAS (f7_get_s64, f7_fixdfdi)
587 #endif // F7MOD_get_s64_
588
589 #ifdef F7MOD_get_u16_
590 F7_WEAK
591 uint16_t f7_get_u16 (const f7_t *aa)
592 {
593 extern uint16_t to_u16 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
594 return to_u16 (aa, 0xf);
595 }
596 #endif // F7MOD_get_u16_
597
598
599 #ifdef F7MOD_get_u32_
600 F7_WEAK
f7_get_u32(const f7_t * aa)601 uint32_t f7_get_u32 (const f7_t *aa)
602 {
603 extern uint32_t to_u32 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
604 return to_u32 (aa, 0x1f);
605 }
ALIAS(f7_get_u32,f7_fixunsdfsi)606 F7_PURE ALIAS (f7_get_u32, f7_fixunsdfsi)
607 #endif // F7MOD_get_u32_
608
609
610 #ifdef F7MOD_get_u64_
611 F7_WEAK
612 uint64_t f7_get_u64 (const f7_t *aa)
613 {
614 extern int64_t to_u64 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
615 return to_u64 (aa, 0x3f);
616 }
617 F7_PURE ALIAS (f7_get_u64, f7_fixunsdfdi)
618 #endif // F7MOD_get_u64_
619
620
621 #ifdef F7MOD_cmp_unordered_
622 F7_NOINLINE
623 static int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a);
624
625 F7_WEAK
f7_cmp_unordered(const f7_t * aa,const f7_t * bb,bool with_sign)626 int8_t f7_cmp_unordered (const f7_t *aa, const f7_t *bb, bool with_sign)
627 {
628 uint8_t a_class = f7_classify (aa);
629 uint8_t b_class = f7_classify (bb);
630
631 uint8_t a_sign = f7_class_sign (a_class) & with_sign;
632 uint8_t b_sign = f7_class_sign (b_class) & with_sign;
633 uint8_t ab_class = a_class | b_class;
634 ab_class &= with_sign - 2;
635
636 if (f7_class_nan (ab_class))
637 return INT8_MIN;
638
639 if (a_sign != b_sign)
640 return b_sign - a_sign;
641
642 if (f7_class_inf (ab_class))
643 return cmp_u8 (a_class, b_class, a_sign);
644
645 if (f7_class_zero (ab_class))
646 return cmp_u8 (b_class, a_class, a_sign);
647
648 if (aa->expo < bb->expo)
649 return a_sign ? 1 : -1;
650
651 if (aa->expo > bb->expo)
652 return a_sign ? -1 : 1;
653
654 return cmp_u8 (1 + f7_cmp_mant (aa, bb), 1, a_sign);
655 }
656
657
cmp_u8(uint8_t a_class,uint8_t b_class,bool sign_a)658 int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a)
659 {
660 int8_t c;
661 __asm ("sub %[a], %[b]" "\n\t"
662 "breq 1f" "\n\t"
663 "sbc %[c], %[c]" "\n\t"
664 "sbci %[c], -1" "\n\t"
665 "sbrc %[s], 0" "\n\t"
666 "neg %[c]" "\n\t"
667 "1:"
668 : [c] "=d" (c)
669 : [a] "0" (a_class), [b] "r" (b_class), [s] "r" (sign_a));
670 return c;
671 }
672 #endif // F7MOD_cmp_unordered_
673
674
675 #ifdef F7MOD_cmp_abs_
676 F7_WEAK
f7_cmp_abs(const f7_t * aa,const f7_t * bb)677 int8_t f7_cmp_abs (const f7_t *aa, const f7_t *bb)
678 {
679 return f7_cmp_unordered (aa, bb, false /* no signs */);
680 }
681 #endif // F7MOD_cmp_abs_
682
683
684 #ifdef F7MOD_cmp_
685 F7_WEAK
f7_cmp(const f7_t * aa,const f7_t * bb)686 int8_t f7_cmp (const f7_t *aa, const f7_t *bb)
687 {
688 return f7_cmp_unordered (aa, bb, true /* with signs */);
689 }
690 #endif // F7MOD_cmp_
691
692
693 #ifdef F7MOD_abscmp_msb_ge_
694 // Compare absolute value of Number aa against a f7_t represented
695 // by msb and expo.
696 F7_WEAK
f7_abscmp_msb_ge(const f7_t * aa,uint8_t msb,int16_t expo)697 bool f7_abscmp_msb_ge (const f7_t *aa, uint8_t msb, int16_t expo)
698 {
699 uint8_t a_msb = aa->mant[F7_MANT_BYTES - 1];
700
701 if (0 == (0x80 & a_msb))
702 // 0 or subnormal.
703 return false;
704
705 return aa->expo == expo
706 ? a_msb >= msb
707 : aa->expo > expo;
708 }
709 #endif // F7MOD_abscmp_msb_ge_
710
711 #ifdef F7MOD_lt_
712 F7_WEAK
f7_lt_impl(const f7_t * aa,const f7_t * bb)713 bool f7_lt_impl (const f7_t *aa, const f7_t *bb)
714 {
715 return f7_lt (aa, bb);
716 }
717 #endif // F7MOD_lt_
718
719 #ifdef F7MOD_le_
720 F7_WEAK
f7_le_impl(const f7_t * aa,const f7_t * bb)721 bool f7_le_impl (const f7_t *aa, const f7_t *bb)
722 {
723 return f7_le (aa, bb);
724 }
725 #endif // F7MOD_le_
726
727 #ifdef F7MOD_gt_
728 F7_WEAK
f7_gt_impl(const f7_t * aa,const f7_t * bb)729 bool f7_gt_impl (const f7_t *aa, const f7_t *bb)
730 {
731 return f7_gt (aa, bb);
732 }
733 #endif // F7MOD_gt_
734
735 #ifdef F7MOD_ge_
736 F7_WEAK
f7_ge_impl(const f7_t * aa,const f7_t * bb)737 bool f7_ge_impl (const f7_t *aa, const f7_t *bb)
738 {
739 return f7_ge (aa, bb);
740 }
741 #endif // F7MOD_ge_
742
743 #ifdef F7MOD_ne_
744 F7_WEAK
f7_ne_impl(const f7_t * aa,const f7_t * bb)745 bool f7_ne_impl (const f7_t *aa, const f7_t *bb)
746 {
747 return f7_ne (aa, bb);
748 }
749 #endif // F7MOD_ne_
750
751 #ifdef F7MOD_eq_
752 F7_WEAK
f7_eq_impl(const f7_t * aa,const f7_t * bb)753 bool f7_eq_impl (const f7_t *aa, const f7_t *bb)
754 {
755 return f7_eq (aa, bb);
756 }
757 #endif // F7MOD_eq_
758
759
760 #ifdef F7MOD_unord_
761 F7_WEAK
f7_unord_impl(const f7_t * aa,const f7_t * bb)762 bool f7_unord_impl (const f7_t *aa, const f7_t *bb)
763 {
764 return f7_unordered (aa, bb);
765 }
766 #endif // F7MOD_unord_
767
768
769 #ifdef F7MOD_minmax_
770 F7_WEAK
f7_minmax(f7_t * cc,const f7_t * aa,const f7_t * bb,bool do_min)771 f7_t* f7_minmax (f7_t *cc, const f7_t *aa, const f7_t *bb, bool do_min)
772 {
773 int8_t cmp = f7_cmp_unordered (aa, bb, true /* with signs */);
774
775 if (cmp == INT8_MIN)
776 return (f7_set_nan (cc), cc);
777
778 if (do_min)
779 cmp = -cmp;
780
781 return f7_copy (cc, cmp >= 0 ? aa : bb);
782 }
783 #endif // F7MOD_minmax_
784
785
786 #ifdef F7MOD_fmax_
787 F7_WEAK
f7_fmax(f7_t * cc,const f7_t * aa,const f7_t * bb)788 f7_t* f7_fmax (f7_t *cc, const f7_t *aa, const f7_t *bb)
789 {
790 return f7_minmax (cc, aa, bb, false);
791 }
ALIAS(f7_fmax,f7_max)792 ALIAS (f7_fmax, f7_max)
793 #endif // F7MOD_fmax_
794
795
796 #ifdef F7MOD_fmin_
797 F7_WEAK
798 f7_t* f7_fmin (f7_t *cc, const f7_t *aa, const f7_t *bb)
799 {
800 return f7_minmax (cc, aa, bb, true);
801 }
ALIAS(f7_fmin,f7_min)802 ALIAS (f7_fmin, f7_min)
803 #endif // F7MOD_fmin_
804
805
806 #ifdef F7MOD_mulx_
807 F7_WEAK
808 uint8_t f7_mulx (f7_t *cc, const f7_t *aa, const f7_t *bb, bool no_rounding)
809 {
810 uint8_t a_class = f7_classify (aa);
811 uint8_t b_class = f7_classify (bb);
812 // From this point on, no more access aa->flags or bb->flags
813 // to avoid early-clobber when writing cc->flags.
814
815 uint8_t ab_class = a_class | b_class;
816 // If either value is NaN, return NaN.
817 if (f7_class_nan (ab_class)
818 // Any combination of Inf and 0.
819 || (f7_class_zero (ab_class) && f7_class_inf (ab_class)))
820 {
821 cc->flags = F7_FLAG_nan;
822 return 0;
823 }
824 // If either value is 0.0, return 0.0.
825 if (f7_class_zero (ab_class))
826 {
827 f7_clr (cc);
828 return 0;
829 }
830 // We have 2 non-zero numbers-or-INF.
831
832 uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign;
833 uint8_t c_inf = ab_class & F7_FLAG_inf;
834 cc->flags = c_sign | c_inf;
835 if (c_inf)
836 return 0;
837
838 int16_t expo = add_ssat16 (aa->expo, bb->expo);
839 // Store expo and handle expo = INT16_MIN and INT16_MAX.
840 if (f7_store_expo (cc, expo))
841 return 0;
842
843 return f7_mul_mant_asm (cc, aa, bb, no_rounding);
844 }
845 #endif // F7MOD_mulx_
846
847
848 #ifdef F7MOD_square_
849 F7_WEAK
f7_square(f7_t * cc,const f7_t * aa)850 void f7_square (f7_t *cc, const f7_t *aa)
851 {
852 f7_mul (cc, aa, aa);
853 }
854 #endif // F7MOD_square_
855
856
857 #ifdef F7MOD_mul_
858 F7_WEAK
f7_mul(f7_t * cc,const f7_t * aa,const f7_t * bb)859 void f7_mul (f7_t *cc, const f7_t *aa, const f7_t *bb)
860 {
861 f7_mulx (cc, aa, bb, false);
862 }
863 #endif // F7MOD_mul_
864
865
866 #ifdef F7MOD_Iadd_
f7_Iadd(f7_t * cc,const f7_t * aa)867 F7_WEAK void f7_Iadd (f7_t *cc, const f7_t *aa) { f7_add (cc, cc, aa); }
868 #endif
869
870 #ifdef F7MOD_Isub_
f7_Isub(f7_t * cc,const f7_t * aa)871 F7_WEAK void f7_Isub (f7_t *cc, const f7_t *aa) { f7_sub (cc, cc, aa); }
872 #endif
873
874 #ifdef F7MOD_Imul_
f7_Imul(f7_t * cc,const f7_t * aa)875 F7_WEAK void f7_Imul (f7_t *cc, const f7_t *aa) { f7_mul (cc, cc, aa); }
876 #endif
877
878 #ifdef F7MOD_Idiv_
f7_Idiv(f7_t * cc,const f7_t * aa)879 F7_WEAK void f7_Idiv (f7_t *cc, const f7_t *aa) { f7_div (cc, cc, aa); }
880 #endif
881
882 #ifdef F7MOD_IRsub_
f7_IRsub(f7_t * cc,const f7_t * aa)883 F7_WEAK void f7_IRsub (f7_t *cc, const f7_t *aa) { f7_sub (cc, aa, cc); }
884 #endif
885
886 #ifdef F7MOD_Ineg_
f7_Ineg(f7_t * cc)887 F7_WEAK void f7_Ineg (f7_t *cc) { f7_neg (cc, cc); }
888 #endif
889
890 #ifdef F7MOD_Isqrt_
f7_Isqrt(f7_t * cc)891 F7_WEAK void f7_Isqrt (f7_t *cc) { f7_sqrt (cc, cc); }
892 #endif
893
894 #ifdef F7MOD_Isquare_
f7_Isquare(f7_t * cc)895 F7_WEAK void f7_Isquare (f7_t *cc) { f7_square (cc, cc); }
896 #endif
897
898 #ifdef F7MOD_Ildexp_
f7_Ildexp(f7_t * cc,int ex)899 F7_WEAK f7_t* f7_Ildexp (f7_t *cc, int ex) { return f7_ldexp (cc, cc, ex); }
900 #endif
901
902
903 #ifdef F7MOD_add_
904 F7_WEAK
f7_add(f7_t * cc,const f7_t * aa,const f7_t * bb)905 void f7_add (f7_t *cc, const f7_t *aa, const f7_t *bb)
906 {
907 f7_addsub (cc, aa, bb, false);
908 }
909 #endif // F7MOD_add_
910
911
912 #ifdef F7MOD_sub_
913 F7_WEAK
f7_sub(f7_t * cc,const f7_t * aa,const f7_t * bb)914 void f7_sub (f7_t *cc, const f7_t *aa, const f7_t *bb)
915 {
916 f7_addsub (cc, aa, bb, true);
917 }
918 #endif // F7MOD_sub_
919
920
921 #ifdef F7MOD_addsub_
return_with_sign(f7_t * cc,const f7_t * aa,int8_t c_sign)922 static void return_with_sign (f7_t *cc, const f7_t *aa, int8_t c_sign)
923 {
924 __asm (";;; return with sign");
925 f7_copy (cc, aa);
926 if (c_sign != -1)
927 f7_set_sign (cc, c_sign);
928 }
929
930 F7_WEAK
f7_addsub(f7_t * cc,const f7_t * aa,const f7_t * bb,bool neg_b)931 void f7_addsub (f7_t *cc, const f7_t *aa, const f7_t *bb, bool neg_b)
932 {
933 uint8_t a_class = f7_classify (aa);
934 uint8_t b_class = f7_classify (bb);
935 // From this point on, no more access aa->flags or bb->flags
936 // to avoid early-clobber when writing cc->flags.
937
938 // Hande NaNs.
939 if (f7_class_nan (a_class | b_class))
940 return f7_set_nan (cc);
941
942 bool a_sign = f7_class_sign (a_class);
943 bool b_sign = f7_class_sign (b_class) ^ neg_b;
944
945 // Add the mantissae?
946 bool do_add = a_sign == b_sign;
947
948 // Handle +Infs and -Infs.
949 bool a_inf = f7_class_inf (a_class);
950 bool b_inf = f7_class_inf (b_class);
951
952 if (a_inf && b_inf)
953 {
954 if (do_add)
955 return f7_set_inf (cc, a_sign);
956 else
957 return f7_set_nan (cc);
958 }
959 else if (a_inf)
960 return f7_set_inf (cc, a_sign);
961 else if (b_inf)
962 return f7_set_inf (cc, b_sign);
963
964 int16_t shift16 = sub_ssat16 (aa->expo, bb->expo);
965
966 // aa + 0 = aa.
967 // Also check MSBit to get rid of Subnormals and 0.
968 if (shift16 > F7_MANT_BITS || f7_is0 (bb))
969 return return_with_sign (cc, aa, -1);
970
971 // 0 + bb = bb.
972 // 0 - bb = -bb.
973 // Also check MSBit to get rid of Subnormals and 0.
974 if (shift16 < -F7_MANT_BITS || f7_is0 (aa))
975 return return_with_sign (cc, bb, b_sign);
976
977 // Now aa and bb are non-zero, non-NaN, non-Inf.
978 // shift > 0 ==> |a| > |b|
979 // shift < 0 ==> |a| < |b|
980 int8_t shift = (int8_t) shift16;
981 bool c_sign = a_sign;
982 if (shift < 0
983 || (shift == 0 && f7_cmp_mant (aa, bb) < 0))
984 {
985 const f7_t *p = aa; aa = bb; bb = p;
986 c_sign = b_sign;
987 shift = -shift;
988 }
989 uint8_t shift2 = (uint8_t) (shift << 1);
990
991 cc->expo = aa->expo;
992 // From this point on, no more access aa->expo or bb->expo
993 // to avoid early-clobber when writing cc->expo.
994
995 cc->flags = c_sign; _Static_assert (F7_FLAGNO_sign == 0, "");
996
997 // This function uses neither .expo nor .flags from either aa or bb,
998 // hence there is early-clobber for cc->expo and cc->flags.
999 f7_addsub_mant_scaled_asm (cc, aa, bb, shift2 | do_add);
1000 }
1001 #endif // F7MOD_addsub_
1002
1003
1004 #ifdef F7MOD_madd_msub_
1005 F7_WEAK
f7_madd_msub(f7_t * cc,const f7_t * aa,const f7_t * bb,const f7_t * dd,bool neg_d)1006 void f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd,
1007 bool neg_d)
1008 {
1009 f7_t xx7, *xx = &xx7;
1010 uint8_t x_lsb = f7_mulx (xx, aa, bb, true /* no rounding */);
1011 uint8_t x_sign = f7_signbit (xx);
1012 int16_t x_expo = xx->expo;
1013 f7_addsub (xx, xx, dd, neg_d);
1014 // Now add LSB. If cancellation occured in the add / sub, then we have the
1015 // chance of extra 8 bits of precision. Turn LSByte into f7_t.
1016 f7_clr (cc);
1017 cc->expo = sub_ssat16 (x_expo, F7_MANT_BITS);
1018 cc->mant[F7_MANT_BYTES - 1] = x_lsb;
1019 cc = f7_normalize_asm (cc);
1020 cc->flags = x_sign;
1021 f7_Iadd (cc, xx);
1022 }
1023 #endif // F7MOD_madd_msub_
1024
1025 #ifdef F7MOD_madd_
1026 F7_WEAK
f7_madd(f7_t * cc,const f7_t * aa,const f7_t * bb,const f7_t * dd)1027 void f7_madd (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd)
1028 {
1029 f7_madd_msub (cc, aa, bb, dd, false);
1030 }
1031 #endif // F7MOD_madd_
1032
1033 #ifdef F7MOD_msub_
1034 F7_WEAK
f7_msub(f7_t * cc,const f7_t * aa,const f7_t * bb,const f7_t * dd)1035 void f7_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd)
1036 {
1037 f7_madd_msub (cc, aa, bb, dd, true);
1038 }
1039 #endif // F7MOD_msub_
1040
1041
1042 #ifdef F7MOD_ldexp_
1043 F7_WEAK
f7_ldexp(f7_t * cc,const f7_t * aa,int delta)1044 f7_t* f7_ldexp (f7_t *cc, const f7_t *aa, int delta)
1045 {
1046 uint8_t a_class = f7_classify (aa);
1047
1048 cc->flags = a_class;
1049
1050 // Inf and NaN.
1051 if (! f7_class_number (a_class))
1052 return cc;
1053
1054 if (f7_msbit (aa) == 0)
1055 return (f7_clr (cc), cc);
1056
1057 int16_t expo = add_ssat16 (delta, aa->expo);
1058 // Store expo and handle expo = INT16_MIN and INT16_MAX.
1059 if (! f7_store_expo (cc, expo))
1060 f7_copy_mant (cc, aa);
1061
1062 return cc;
1063 }
1064 #endif // F7MOD_ldexp_
1065
1066
1067 #if USE_LPM
1068 #elif USE_LD
1069 #else
1070 #error need include "asm-defs.h"
1071 #endif // USE_LPM
1072
1073 /*
1074 Handling constants:
1075
1076 F7_PCONST (PVAR, X)
1077
1078 Set f7_t [const] *PVAR to an LD address for one
1079 of the f7_const_X[_P] constants.
1080 PVAR might be set to point to a local auto that serves
1081 as temporary storage for f7_const_X_P. PVAR is only
1082 valid in the current block.
1083
1084 const f7_t* F7_PCONST_U16 (PVAR, <ident> X) // USE_LD
1085 f7_t* F7_PCONST_U16 (PVAR, uint16_t X) // USE_LPM
1086
1087 Set f7_t [const] *PVAR to an LD address for one of the
1088 f7_const_X[_P] constants. PVAR might be set to point to a
1089 local auto that serves as temporary storage for X. PVAR is
1090 only valid in the current block.
1091
1092 F7_PCONST_VAR (PVAR, VAR)
1093
1094 VAR is a pointer variable holding the address of some f7_const_X[_P]
1095 constant. Set [const] f7_t *PVAR to a respective LD address.
1096 PVAR might be set to point to a local auto that serves
1097 as temporary storage for f7_const_X_P. PVAR is only
1098 valid in the current block.
1099
1100 F7_CONST_ADDR (<ident> CST, f7_t* PTMP)
1101
1102 Return an LD address to for some f7_const_X[_P] constant.
1103 *PTMP might be needed to hold a copy of f7_const_X_P in RAM.
1104
1105 f7_t* F7_U16_ADDR (uint16_t X, f7_t* PTMP) // USE_LPM
1106 const f7_t* F7_U16_ADDR (<cst-ident> X, <unused>) // USE_LD
1107
1108 Return an LD address to compile-time constant uint16_t X which is
1109 also known as f7_const_X[_P]. *PTMP might be set to (f7_t) X.
1110
1111 f7_t* f7_const (f7_t *PVAR, <cst-ident> X)
1112
1113 Copy f7_const_X[_P] to *PVAR.
1114
1115 f7_t* f7_copy_flash (f7_t *DST, const f7_t *SRC)
1116
1117 Copy to *DST with LD (from .rodata in flash) if the address
1118 space is linear, or with LPM (from .progmem.data) if the
1119 address space is not linear.
1120
1121 f7_t* f7_copy (f7_t *DST, const f7_t* SRC)
1122
1123 Copy to RAM using LD.
1124
1125 f7_t* f7_copy_P (f7_t *DST, const f7_t *SRC)
1126
1127 Copy to RAM using LPM.
1128 */
1129
1130 #if USE_LPM
1131 #define F7_RAW_CONST_ADDR(CST) \
1132 & F7_(const_##CST##_P)
1133
1134 #define F7_PCONST(PVAR, CST) \
1135 f7_t _var_for_##CST; \
1136 f7_copy_P (& _var_for_##CST, & F7_(const_##CST##_P)); \
1137 PVAR = & _var_for_##CST
1138
1139 #define F7_PCONST_U16(PVAR, CST) \
1140 f7_t _var_for_##CST; \
1141 PVAR = f7_set_u16 (& _var_for_##CST, CST)
1142
1143 #define F7_PCONST_VAR(PVAR, VAR) \
1144 f7_t _var_for_##VAR; \
1145 f7_copy_P (& _var_for_##VAR, VAR); \
1146 PVAR = & _var_for_##VAR
1147
1148 #define MAYBE_const // empty
1149
1150 #define F7_CONST_ADDR(CST, PTMP) \
1151 f7_copy_P ((PTMP), & F7_(const_##CST##_P))
1152
1153 #define F7_U16_ADDR(CST, PTMP) \
1154 f7_set_u16 ((PTMP), CST)
1155
1156 #elif USE_LD
1157 #define F7_RAW_CONST_ADDR(CST) \
1158 & F7_(const_##CST)
1159
1160 #define F7_PCONST(PVAR, CST) \
1161 PVAR = & F7_(const_##CST)
1162
1163 #define F7_PCONST_U16(PVAR, CST) \
1164 PVAR = & F7_(const_##CST)
1165
1166 #define F7_PCONST_VAR(PVAR, VAR) \
1167 PVAR = (VAR)
1168
1169 #define F7_CONST_ADDR(CST, PTMP) \
1170 (& F7_(const_##CST))
1171
1172 #define F7_U16_ADDR(CST, PTMP) \
1173 (& F7_(const_##CST))
1174
1175 #define MAYBE_const const
1176 #endif
1177
1178
1179
1180 #define DD(str, X) \
1181 do { \
1182 LOG_PSTR (PSTR (str)); \
1183 f7_dump (X); \
1184 } while (0)
1185
1186 #undef DD
1187 #define DD(...) (void) 0
1188
1189
1190 #ifdef F7MOD_sqrt_
sqrt_worker(f7_t * cc,const f7_t * rr)1191 static void sqrt_worker (f7_t *cc, const f7_t *rr)
1192 {
1193 f7_t tmp7, *tmp = &tmp7;
1194 f7_t aa7, *aa = &aa7;
1195
1196 // aa in [1/2, 2) => aa->expo in { -1, 0 }.
1197 int16_t a_expo = -(rr->expo & 1);
1198 int16_t c_expo = (rr->expo - a_expo) >> 1; // FIXME: r_expo = INT_MAX???
1199
1200 __asm ("" : "+r" (aa));
1201
1202 f7_copy (aa, rr);
1203 aa->expo = a_expo;
1204
1205 // No use of rr or *cc past this point: We may use cc as temporary.
1206 // Approximate square-root of A by X <-- (X + A / X) / 2.
1207
1208 f7_sqrt_approx_asm (cc, aa);
1209
1210 // Iterate X <-- (X + A / X) / 2.
1211 // 3 Iterations with 16, 32, 58 bits of precision for the quotient.
1212
1213 for (uint8_t prec = 16; (prec & 0x80) == 0; prec <<= 1)
1214 {
1215 f7_divx (tmp, aa, cc, (prec & 64) ? 2 + F7_MANT_BITS : prec);
1216 f7_Iadd (cc, tmp);
1217 // This will never underflow because |c_expo| is small.
1218 cc->expo--;
1219 }
1220
1221 // Similar: |c_expo| is small, hence no ldexp needed.
1222 cc->expo += c_expo;
1223 }
1224
1225 F7_WEAK
f7_sqrt(f7_t * cc,const f7_t * aa)1226 void f7_sqrt (f7_t *cc, const f7_t *aa)
1227 {
1228 uint8_t a_class = f7_classify (aa);
1229
1230 if (f7_class_nan (a_class) || f7_class_sign (a_class))
1231 return f7_set_nan (cc);
1232
1233 if (f7_class_inf (a_class))
1234 return f7_set_inf (cc, 0);
1235
1236 if (f7_class_zero (a_class))
1237 return f7_clr (cc);
1238
1239 sqrt_worker (cc, aa);
1240 }
1241 #endif // F7MOD_sqrt_
1242
1243
1244 #ifdef F7MOD_hypot_
1245 F7_WEAK
f7_hypot(f7_t * cc,const f7_t * aa,const f7_t * bb)1246 void f7_hypot (f7_t *cc, const f7_t *aa, const f7_t *bb)
1247 {
1248 f7_t xx7, *xx = &xx7;
1249
1250 f7_square (xx, aa);
1251 f7_square (cc, bb);
1252 f7_Iadd (cc, xx);
1253 f7_Isqrt (cc);
1254 }
1255 #endif // F7MOD_hypot_
1256
1257
1258 #ifdef F7MOD_const_m1_
1259 #include "libf7-constdef.h"
1260 #endif // -1
1261
1262 #ifdef F7MOD_const_1_2_
1263 #include "libf7-constdef.h"
1264 #endif // 1/2
1265
1266 #ifdef F7MOD_const_1_3_
1267 #include "libf7-constdef.h"
1268 #endif // 1/3
1269
1270 #ifdef F7MOD_const_ln2_
1271 #include "libf7-constdef.h"
1272 #endif // ln2
1273
1274 #ifdef F7MOD_const_1_ln2_
1275 #include "libf7-constdef.h"
1276 #endif // 1_ln2
1277
1278 #ifdef F7MOD_const_ln10_
1279 #include "libf7-constdef.h"
1280 #endif // ln10
1281
1282 #ifdef F7MOD_const_1_ln10_
1283 #include "libf7-constdef.h"
1284 #endif // 1_ln10
1285
1286 #ifdef F7MOD_const_1_
1287 #include "libf7-constdef.h"
1288 #endif // 1
1289
1290 #ifdef F7MOD_const_sqrt2_
1291 #include "libf7-constdef.h"
1292 #endif // sqrt2
1293
1294 #ifdef F7MOD_const_2_
1295 #include "libf7-constdef.h"
1296 #endif // 2
1297
1298 #ifdef F7MOD_const_pi_
1299 #include "libf7-constdef.h"
1300 #endif // pi
1301
1302
1303 #ifdef F7MOD_divx_
1304
1305 // C /= A
1306 extern void f7_div_asm (f7_t*, const f7_t*, uint8_t);
1307
1308 F7_WEAK
f7_divx(f7_t * cc,const f7_t * aa,const f7_t * bb,uint8_t quot_bits)1309 void f7_divx (f7_t *cc, const f7_t *aa, const f7_t *bb, uint8_t quot_bits)
1310 {
1311 uint8_t a_class = f7_classify (aa);
1312 uint8_t b_class = f7_classify (bb);
1313 // From this point on, no more access aa->flags or bb->flags
1314 // to avoid early-clobber when writing cc->flags.
1315
1316 // If either value is NaN, return NaN.
1317 if (f7_class_nan (a_class | b_class)
1318 // If both values are Inf or both are 0, return NaN.
1319 || f7_class_zero (a_class & b_class)
1320 || f7_class_inf (a_class & b_class)
1321 // Inf / 0 = NaN.
1322 || (f7_class_inf (a_class) && f7_class_zero (b_class)))
1323 {
1324 return f7_set_nan (cc);
1325 }
1326
1327 // 0 / B = 0 for non-zero, non-NaN B.
1328 // A / Inf = 0 for non-zero numbers A.
1329 if (f7_class_zero (a_class) || f7_class_inf (b_class))
1330 return f7_clr (cc);
1331
1332 uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign;
1333
1334 if (f7_class_inf (a_class) || f7_class_zero (b_class))
1335 return f7_set_inf (cc, c_sign);
1336
1337 cc->flags = c_sign; _Static_assert (F7_FLAGNO_sign == 0, "");
1338 int16_t expo = sub_ssat16 (aa->expo, bb->expo);
1339
1340 // Store expo and handle expo = INT16_MIN and INT16_MAX.
1341 if (f7_store_expo (cc, expo))
1342 return;
1343
1344 f7_t ss7, *ss = &ss7;
1345 ss->flags = cc->flags;
1346 ss->expo = cc->expo;
1347
1348 f7_copy_mant (ss, aa);
1349 f7_div_asm (ss, bb, quot_bits);
1350 f7_copy (cc, ss);
1351 }
1352 #endif // F7MOD_divx_
1353
1354
1355 #ifdef F7MOD_div_
1356 F7_WEAK
f7_div(f7_t * cc,const f7_t * aa,const f7_t * bb)1357 void f7_div (f7_t *cc, const f7_t *aa, const f7_t *bb)
1358 {
1359 /* When f7_divx calls f7_div_asm, dividend and divisor are valid
1360 mantissae, i.e. their MSBit is set. Therefore, the quotient will
1361 be in [0x0.ff..., 0x0.40...] and to adjust it, at most 1 left-shift
1362 is needed. Compute F7_MANT_BITS + 2 bits of the quotient:
1363 One bit is used for rounding, and one bit might be consumed by the
1364 mentioned left-shift. */
1365
1366 f7_divx (cc, aa, bb, 2 + F7_MANT_BITS);
1367 }
1368 #endif // F7MOD_div_
1369
1370
1371 #ifdef F7MOD_div1_
1372 F7_WEAK
f7_div1(f7_t * cc,const f7_t * aa)1373 void f7_div1 (f7_t *cc, const f7_t *aa)
1374 {
1375 F7_PCONST_U16 (const f7_t *one, 1);
1376 f7_div (cc, one, aa);
1377 }
1378 #endif // F7MOD_div_
1379
1380
1381 #ifdef F7MOD_fmod_
1382 F7_WEAK
f7_fmod(f7_t * cc,const f7_t * aa,const f7_t * bb)1383 void f7_fmod (f7_t *cc, const f7_t *aa, const f7_t *bb)
1384 {
1385 uint8_t a_class = f7_classify (aa);
1386 uint8_t b_class = f7_classify (bb);
1387
1388 if (! f7_class_number (a_class)
1389 || f7_class_nan (b_class)
1390 || f7_class_zero (b_class))
1391 {
1392 return f7_set_nan (cc);
1393 }
1394
1395 // A == 0 and B != 0 => 0.
1396 if (f7_class_zero (a_class))
1397 return f7_clr (cc);
1398
1399 f7_t zz7, *zz = & zz7;
1400
1401 f7_div (zz, aa, bb);
1402
1403 // Z in Z, |Z| <= |A/B|.
1404 f7_trunc (zz, zz);
1405
1406 // C = A - Z * B.
1407 f7_msub (cc, zz, bb, aa);
1408 cc->flags ^= F7_FLAG_sign;
1409 }
1410 #endif // F7MOD_fmod_
1411
1412
1413 #ifdef F7MOD_truncx_
1414 F7_WEAK
f7_truncx(f7_t * cc,const f7_t * aa,bool do_floor)1415 f7_t* f7_truncx (f7_t *cc, const f7_t *aa, bool do_floor)
1416 {
1417 uint8_t a_class = f7_classify (aa);
1418
1419 if (! f7_class_nonzero (a_class))
1420 return f7_copy (cc, aa);
1421
1422 bool sign = f7_class_sign (a_class);
1423
1424 int16_t a_expo = aa->expo;
1425
1426 if (a_expo < 0)
1427 {
1428 // |A| < 1.
1429 if (sign & do_floor)
1430 return f7_set_s16 (cc, -1);
1431
1432 f7_clr (cc);
1433 return cc;
1434 }
1435 else if (a_expo >= F7_MANT_BITS - 1)
1436 // A == floor (A).
1437 return f7_copy (cc, aa);
1438
1439 f7_t tmp7, *tmp = &tmp7;
1440
1441 // Needed if aa === cc.
1442 f7_copy (tmp, aa);
1443
1444 cc->flags = sign;
1445 cc->expo = a_expo;
1446 f7_clr_mant_lsbs (cc, aa, F7_MANT_BITS - 1 - a_expo);
1447
1448 if (do_floor && cc->sign && f7_cmp_mant (cc, tmp) != 0)
1449 {
1450 F7_PCONST_U16 (const f7_t *one, 1);
1451 f7_Isub (cc, one);
1452 }
1453
1454 return cc;
1455 }
1456 #endif // F7MOD_truncx_
1457
1458
1459 #ifdef F7MOD_floor_
1460 F7_WEAK
f7_floor(f7_t * cc,const f7_t * aa)1461 f7_t* f7_floor (f7_t *cc, const f7_t *aa)
1462 {
1463 return f7_truncx (cc, aa, true);
1464 }
1465 #endif // F7MOD_floor_
1466
1467
1468 #ifdef F7MOD_trunc_
1469 F7_WEAK
f7_trunc(f7_t * cc,const f7_t * aa)1470 f7_t* f7_trunc (f7_t *cc, const f7_t *aa)
1471 {
1472 return f7_truncx (cc, aa, false);
1473 }
1474 #endif // F7MOD_trunc_
1475
1476
1477 #ifdef F7MOD_ceil_
1478 F7_WEAK
f7_ceil(f7_t * cc,const f7_t * aa)1479 void f7_ceil (f7_t *cc, const f7_t *aa)
1480 {
1481 cc = f7_copy (cc, aa);
1482 cc->flags ^= F7_FLAG_sign;
1483 cc = f7_floor (cc, cc);
1484 f7_Ineg (cc);
1485 }
1486 #endif // F7MOD_ceil_
1487
1488
1489 #ifdef F7MOD_round_
1490 F7_WEAK
f7_round(f7_t * cc,const f7_t * aa)1491 void f7_round (f7_t *cc, const f7_t *aa)
1492 {
1493 f7_t tmp;
1494 (void) tmp;
1495 const f7_t *half = F7_CONST_ADDR (1_2, &tmp);
1496
1497 f7_addsub (cc, aa, half, f7_signbit (aa));
1498 f7_trunc (cc, cc);
1499 }
1500 #endif // F7MOD_round_
1501
1502
1503 #ifdef F7MOD_horner_
1504
1505 // Assertion when using this function is that either cc != xx,
1506 // or if cc == xx, then tmp1 must be non-NULL and tmp1 != xx.
1507 // In General, the calling functions have a spare f7_t object available
1508 // and can pass it down to save some stack.
1509 // Moreover, the power series must have degree 1 at least.
1510
1511 F7_WEAK
f7_horner(f7_t * cc,const f7_t * xx,uint8_t n_coeff,const f7_t * coeff,f7_t * tmp1)1512 void f7_horner (f7_t *cc, const f7_t *xx, uint8_t n_coeff, const f7_t *coeff,
1513 f7_t *tmp1)
1514 {
1515 f7_assert (n_coeff > 1);
1516
1517 if (cc != xx)
1518 tmp1 = cc;
1519 else
1520 f7_assert (tmp1 != NULL && tmp1 != xx);
1521
1522 f7_t *yy = tmp1;
1523 f7_t tmp27, *tmp2 = &tmp27;
1524
1525 n_coeff--;
1526 const f7_t *pcoeff = coeff + n_coeff;
1527
1528 f7_copy_flash (yy, pcoeff);
1529
1530 while (1)
1531 {
1532 --pcoeff;
1533 #if 1
1534 f7_Imul (yy, xx);
1535 const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff);
1536 if (coeff == pcoeff)
1537 return f7_add (cc, yy, cst);
1538 f7_Iadd (yy, cst);
1539 #else
1540 const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff);
1541 f7_madd (yy, yy, xx, cst);
1542 if (coeff == pcoeff)
1543 {
1544 f7_copy (cc, yy);
1545 return;
1546 }
1547 #endif
1548 }
1549
1550 __builtin_unreachable();
1551 }
1552 #endif // F7MOD_horner_
1553
1554
1555 #ifdef F7MOD_log_
1556 F7_WEAK
f7_log(f7_t * cc,const f7_t * aa)1557 void f7_log (f7_t *cc, const f7_t *aa)
1558 {
1559 f7_logx (cc, aa, NULL);
1560 }
1561 #endif // F7MOD_log_
1562
1563
1564 #ifdef F7MOD_log2_
1565 F7_WEAK
f7_log2(f7_t * cc,const f7_t * aa)1566 void f7_log2 (f7_t *cc, const f7_t *aa)
1567 {
1568 f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln2));
1569 }
1570 #endif // F7MOD_log2_
1571
1572
1573 #ifdef F7MOD_log10_
1574 F7_WEAK
f7_log10(f7_t * cc,const f7_t * aa)1575 void f7_log10 (f7_t *cc, const f7_t *aa)
1576 {
1577 f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln10));
1578 }
1579 #endif // F7MOD_log10_
1580
1581
1582 #ifdef F7MOD_logx_
1583
1584 #define ARRAY_NAME coeff_artanh
1585 #include "libf7-array.def"
1586 #undef ARRAY_NAME
1587
1588 // Compute P * ln(A) if P != NULL and ln(A), otherwise.
1589 // P is a LD-address if USE_LD and a LPM-address if USE_LPM.
1590 // Assumption is that P > 0.
1591
1592 F7_WEAK
f7_logx(f7_t * cc,const f7_t * aa,const f7_t * p)1593 void f7_logx (f7_t *cc, const f7_t *aa, const f7_t *p)
1594 {
1595 uint8_t a_class = f7_classify (aa);
1596
1597 if (f7_class_nan (a_class) || f7_class_sign (a_class))
1598 return f7_set_nan (cc);
1599
1600 if (f7_class_inf (a_class))
1601 return f7_set_inf (cc, 0);
1602
1603 if (f7_class_zero (a_class))
1604 return f7_set_inf (cc, 1);
1605
1606 f7_t *yy = cc;
1607 f7_t xx7, *xx = &xx7;
1608 f7_t tmp7, *tmp = &tmp7;
1609
1610 // Y in [1, 2] = A * 2 ^ (-a_expo).
1611 int16_t a_expo = aa->expo;
1612 f7_copy (yy, aa);
1613 yy->expo = 0;
1614
1615 // Y in [1 / sqrt2, sqrt2].
1616
1617 if (f7_abscmp_msb_ge (yy, F7_(const_sqrt2_msb), F7_(const_sqrt2_expo)))
1618 {
1619 yy->expo = -1;
1620 a_expo = add_ssat16 (a_expo, 1);
1621 }
1622
1623 const f7_t *one = F7_U16_ADDR (1, & tmp7);
1624
1625 // X := (Y - 1) / (Y + 1), |X| <= (sqrt2 - 1) / (sqrt2 + 1) ~ 0.172.
1626 f7_sub (xx, yy, one);
1627 f7_Iadd (yy, one);
1628 f7_Idiv (xx, yy);
1629
1630 // Y := X^2, |Y| < 0.03.
1631 f7_square (yy, xx);
1632
1633 // Y := artanh (X^2) / X
1634 f7_horner (yy, yy, n_coeff_artanh, coeff_artanh, tmp);
1635
1636 // C = X * Y = ln A - a_expo * ln2.
1637 f7_mul (cc, xx, yy);
1638
1639 // X := a_expo * ln2.
1640 f7_set_s16 (xx, a_expo);
1641 f7_Imul (xx, F7_CONST_ADDR (ln2, & tmp7));
1642
1643 // C = ln A.
1644 f7_Iadd (cc, xx);
1645
1646 if (p && USE_LPM)
1647 f7_Imul (cc, f7_copy_P (tmp, p));
1648 if (p && USE_LD)
1649 f7_Imul (cc, p);
1650 }
1651 #endif // F7MOD_logx_
1652
1653
1654 #ifdef F7MOD_exp_
1655
1656 #define ARRAY_NAME coeff_exp
1657 #include "libf7-array.def"
1658 #undef ARRAY_NAME
1659
1660 #define STATIC static
1661 #include "libf7-constdef.h" // ln2_low
1662 #undef STATIC
1663
1664 F7_WEAK
f7_exp(f7_t * cc,const f7_t * aa)1665 void f7_exp (f7_t *cc, const f7_t *aa)
1666 {
1667 uint8_t a_class = f7_classify (aa);
1668
1669 if (f7_class_nan (a_class))
1670 return f7_set_nan (cc);
1671
1672 /* The maximal exponent of 2 for a double is 1023, hence we may limit
1673 to |A| < 1023 * ln2 ~ 709. We limit to 1024 ~ 1.99 * 2^9 */
1674
1675 if (f7_class_inf (a_class)
1676 || (f7_class_nonzero (a_class) && aa->expo >= 9))
1677 {
1678 if (f7_class_sign (a_class))
1679 return f7_clr (cc);
1680 else
1681 return f7_set_inf (cc, 0);
1682 }
1683
1684 f7_t const *cst;
1685 f7_t qq7, *qq = &qq7;
1686
1687 F7_PCONST (cst, ln2);
1688
1689 // We limited |A| to 1024 and are now dividing by ln2, hence Q will
1690 // be at most 1024 / ln2 ~ 1477 and fit into 11 bits. We will
1691 // round Q anyway, hence only request 11 bits from the division and
1692 // one additional bit that might be needed to normalize the quotient.
1693 f7_divx (qq, aa, cst, 1 + 11);
1694
1695 // Use the smallest (by absolute value) remainder system.
1696 f7_round (qq, qq);
1697 int16_t q = f7_get_s16 (qq);
1698
1699 // Reducing A mod ln2 gives |C| <= ln2 / 2, C = -A mod ln2.
1700 f7_msub (cc, qq, cst, aa);
1701
1702 // Corrigendum: We added Q * ln2; now add Q times the low part of ln2
1703 // for better precision. Due to |C| < |A| this is not a no-op in general.
1704 const f7_t *yy = F7_CONST_ADDR (ln2_low, &_var_for_ln2);
1705 f7_madd (cc, qq, yy, cc);
1706
1707 // Because we computed C = -A mod ...
1708 cc->flags ^= F7_FLAG_sign;
1709
1710 // Reduce further to |C| < ln2 / 8 which is the range of our MiniMax poly.
1711 const uint8_t MAX_LN2_RED = 3;
1712 int8_t scal2 = 0;
1713
1714 while (f7_abscmp_msb_ge (cc, F7_(const_ln2_msb),
1715 F7_(const_ln2_expo) - MAX_LN2_RED))
1716 {
1717 scal2++;
1718 cc->expo--;
1719 }
1720
1721 f7_horner (cc, cc, n_coeff_exp, coeff_exp, qq);
1722
1723 while (--scal2 >= 0)
1724 f7_Isquare (cc);
1725
1726 f7_Ildexp (cc, q);
1727 }
1728 #endif // F7MOD_exp_
1729
1730
1731 #ifdef F7MOD_pow10_
1732 F7_WEAK
f7_pow10(f7_t * cc,const f7_t * aa)1733 void f7_pow10 (f7_t *cc, const f7_t *aa)
1734 {
1735 const f7_t *p_ln10;
1736 F7_PCONST (p_ln10, ln10);
1737 f7_mul (cc, aa, p_ln10);
1738 f7_exp (cc, cc);
1739 }
ALIAS(f7_pow10,f7_exp10)1740 ALIAS (f7_pow10, f7_exp10)
1741 #endif // F7MOD_pow10_
1742
1743
1744 #ifdef F7MOD_cbrt_
1745 F7_WEAK
1746 void f7_cbrt (f7_t *cc, const f7_t *aa)
1747 {
1748 f7_copy (cc, aa);
1749 const f7_t *p_1_3;
1750 uint8_t c_flags = cc->flags;
1751 cc->flags &= ~F7_FLAG_sign;
1752 f7_log (cc, cc);
1753 F7_PCONST (p_1_3, 1_3);
1754 f7_Imul (cc, p_1_3);
1755 f7_exp (cc, cc);
1756
1757 if (c_flags & F7_FLAG_sign)
1758 cc->flags |= F7_FLAG_sign;
1759 }
1760 #endif // F7MOD_cbrt_
1761
1762
1763 #ifdef F7MOD_pow_
1764 F7_WEAK
f7_pow(f7_t * cc,const f7_t * aa,const f7_t * bb)1765 void f7_pow (f7_t *cc, const f7_t *aa, const f7_t *bb)
1766 {
1767 #if 0
1768 f7_t slots[cc == bb];
1769 f7_t *yy = cc == bb ? slots : cc;
1770 #else
1771 f7_t yy7, *yy = &yy7;
1772 #endif
1773 f7_log (yy, aa);
1774 f7_Imul (yy, bb);
1775 f7_exp (cc, yy);
1776 }
1777 #endif // F7MOD_pow_
1778
1779
1780 #ifdef F7MOD_powi_
1781 F7_WEAK
f7_powi(f7_t * cc,const f7_t * aa,int ii)1782 void f7_powi (f7_t *cc, const f7_t *aa, int ii)
1783 {
1784 uint16_t u16 = ii;
1785 f7_t xx27, *xx2 = &xx27;
1786
1787 if (ii < 0)
1788 u16 = -u16;
1789
1790 f7_copy (xx2, aa);
1791
1792 f7_set_u16 (cc, 1);
1793
1794 while (1)
1795 {
1796 if (u16 & 1)
1797 f7_Imul (cc, xx2);
1798
1799 if (! f7_is_nonzero (cc))
1800 break;
1801
1802 u16 >>= 1;
1803 if (u16 == 0)
1804 break;
1805 f7_Isquare (xx2);
1806 }
1807
1808 if (ii < 0)
1809 f7_div1 (xx2, aa);
1810 }
1811 #endif // F7MOD_powi_
1812
1813
1814 #ifdef F7MOD_sincos_
1815
1816 #define ARRAY_NAME coeff_sin
1817 #define FOR_SIN
1818 #include "libf7-array.def"
1819 #undef FOR_SIN
1820 #undef ARRAY_NAME
1821
1822 #define ARRAY_NAME coeff_cos
1823 #define FOR_COS
1824 #include "libf7-array.def"
1825 #undef FOR_COS
1826 #undef ARRAY_NAME
1827
1828 #define STATIC static
1829 #include "libf7-constdef.h" // pi_low
1830 #undef STATIC
1831
1832 typedef union
1833 {
1834 struct
1835 {
1836 bool neg_sin : 1; // Must be bit F7_FLAGNO_sign.
1837 bool neg_cos : 1;
1838 bool do_sin: 1;
1839 bool do_cos: 1;
1840 bool swap_sincos : 1;
1841 uint8_t res : 3;
1842 };
1843 uint8_t bits;
1844 } sincos_t;
1845
1846
1847 F7_WEAK
f7_sincos(f7_t * ss,f7_t * cc,const f7_t * aa)1848 void f7_sincos (f7_t *ss, f7_t *cc, const f7_t *aa)
1849 {
1850 uint8_t a_class = f7_classify (aa);
1851
1852 sincos_t sc = { .bits = a_class & F7_FLAG_sign };
1853 if (ss != NULL) sc.do_sin = 1;
1854 if (cc != NULL) sc.do_cos = 1;
1855
1856 if (f7_class_nan (a_class) || f7_class_inf (a_class))
1857 {
1858 if (sc.do_sin) f7_set_nan (ss);
1859 if (sc.do_cos) f7_set_nan (cc);
1860 return;
1861 }
1862
1863 f7_t pi7, *pi = &pi7;
1864 f7_t xx7, *xx = &xx7;
1865 f7_t yy7, *yy = &yy7;
1866 f7_t *hh = sc.do_sin ? ss : cc;
1867
1868 // X = |A|
1869 f7_copy (xx, aa);
1870 xx->flags = 0;
1871
1872 // Y is how often we subtract PI from X.
1873 f7_clr (yy);
1874 f7_const (pi, pi);
1875
1876 if (f7_abscmp_msb_ge (xx, F7_(const_pi_msb), F7_(const_pi_expo) + 1))
1877 {
1878 pi->expo = 1 + F7_(const_pi_expo); // 2*pi
1879
1880 // Y = X / 2pi.
1881 f7_div (yy, xx, pi);
1882
1883 // The integral part of |A| / pi mod 2 is bit 55 - x_expo.
1884 if (yy->expo >= F7_MANT_BITS && !f7_is_zero (yy))
1885 {
1886 // Too big for sensible calculation: Should this be NaN instead?
1887 if (sc.do_sin) f7_clr (ss);
1888 if (sc.do_cos) f7_clr (cc);
1889 return;
1890 }
1891
1892 // X -= 2pi * [ X / 2pi ]
1893 f7_floor (yy, yy);
1894
1895 f7_msub (xx, yy, pi, xx);
1896 xx->flags ^= F7_FLAG_sign;
1897
1898 // We divided by 2pi, but Y should count times we subtracted pi.
1899 yy->expo++;
1900 }
1901
1902 pi->expo = F7_(const_pi_expo); // pi
1903 f7_sub (hh, xx, pi);
1904 if (!f7_signbit (hh))
1905 {
1906 // H = X - pi >= 0 => X >= pi
1907 // sin(x) = -sin(x - pi)
1908 // cos(x) = -cos(x - pi)
1909 f7_copy (xx, hh);
1910 // Y: We subtracted pi one more time.
1911 f7_Iadd (yy, f7_set_u16 (hh, 1));
1912 sc.neg_sin ^= 1;
1913 sc.neg_cos ^= 1;
1914 }
1915
1916 pi->expo = F7_(const_pi_expo) - 1; // pi/2
1917 if (f7_gt (xx, pi))
1918 {
1919 // x > pi/2
1920 // sin(x) = sin(pi - x)
1921 // cos(x) = -cos(pi - x)
1922 pi->expo = F7_(const_pi_expo); // pi
1923 f7_IRsub (xx, pi);
1924 // Y: We subtracted pi one more time (and then negated).
1925 f7_Iadd (yy, f7_set_u16 (hh, 1));
1926 yy->flags ^= F7_FLAG_sign;
1927 sc.neg_cos ^= 1;
1928 }
1929
1930 pi->expo = F7_(const_pi_expo) - 2; // pi/4
1931 if (f7_gt (xx, pi))
1932 {
1933 // x > pi/4
1934 // sin(x) = cos(pi/2 - x)
1935 // cos(x) = sin(pi/2 - x)
1936 pi->expo = F7_(const_pi_expo) - 1; // pi/2
1937 f7_IRsub (xx, pi);
1938 // Y: We subtracted pi/2 one more time (and then negated).
1939 f7_Iadd (yy, f7_set_1pow2 (hh, -1, 0));
1940 yy->flags ^= F7_FLAG_sign;
1941 sc.swap_sincos = 1;
1942 }
1943
1944 if (!f7_is0 (yy))
1945 {
1946 // Y counts how often we subtracted pi from X in order to
1947 // get 0 <= X < pi/4 as small as possible (Y is 0 mod 0.5).
1948 // Now also subtract the low part of pi:
1949 // f7_const_pi_low = pi - f7_const_pi in order to get more precise
1950 // results in the cases where the final result is close to 0.
1951 const f7_t *pi_low = F7_CONST_ADDR (pi_low, pi);
1952 //f7_const (pi, pi_low);
1953 f7_Imul (yy, pi_low);
1954 f7_Isub (xx, yy);
1955 }
1956
1957 // X in [0, pi/4].
1958 // X^2 in [0, pi^2/16] ~ [0, 0.6169]
1959
1960 f7_square (yy, xx);
1961
1962 f7_t *x_sin = xx;
1963 f7_t *x_cos = yy;
1964
1965 if ((sc.do_sin && !sc.swap_sincos)
1966 || (sc.do_cos && sc.swap_sincos))
1967 {
1968 f7_horner (hh, yy, n_coeff_sin, coeff_sin, NULL);
1969 f7_mul (x_sin, hh, xx);
1970 }
1971
1972 if ((sc.do_cos && !sc.swap_sincos)
1973 || (sc.do_sin && sc.swap_sincos))
1974 {
1975 f7_horner (x_cos, yy, n_coeff_cos, coeff_cos, hh);
1976 }
1977
1978 if (sc.swap_sincos)
1979 {
1980 x_sin = yy;
1981 x_cos = xx;
1982 }
1983
1984 if (sc.do_sin)
1985 {
1986 x_sin->flags ^= sc.bits;
1987 x_sin->flags &= F7_FLAG_sign;
1988 f7_copy (ss, x_sin);
1989 }
1990
1991 if (sc.do_cos)
1992 {
1993 x_cos->flags = sc.neg_cos;
1994 f7_copy (cc, x_cos);
1995 }
1996 }
1997 #endif // F7MOD_sincos_
1998
1999 #ifdef F7MOD_sin_
2000 F7_WEAK
f7_sin(f7_t * ss,const f7_t * aa)2001 void f7_sin (f7_t *ss, const f7_t *aa)
2002 {
2003 f7_sincos (ss, NULL, aa);
2004 }
2005 #endif // F7MOD_sin_
2006
2007 #ifdef F7MOD_cos_
2008 F7_WEAK
f7_cos(f7_t * cc,const f7_t * aa)2009 void f7_cos (f7_t *cc, const f7_t *aa)
2010 {
2011 f7_sincos (NULL, cc, aa);
2012 }
2013 #endif // F7MOD_cos_
2014
2015
2016 #ifdef F7MOD_tan_
2017 F7_WEAK
f7_tan(f7_t * tt,const f7_t * aa)2018 void f7_tan (f7_t *tt, const f7_t *aa)
2019 {
2020 f7_t xcos;
2021 f7_sincos (tt, & xcos, aa);
2022 f7_Idiv (tt, & xcos);
2023 }
2024 #endif // F7MOD_tan_
2025
2026
2027 #ifdef F7MOD_cotan_
2028 F7_WEAK
f7_cotan(f7_t * ct,const f7_t * aa)2029 void f7_cotan (f7_t *ct, const f7_t *aa)
2030 {
2031 f7_t xcos;
2032 f7_sincos (ct, & xcos, aa);
2033 f7_div (ct, & xcos, ct);
2034 }
2035 #endif // F7MOD_cotan_
2036
2037
2038 #ifdef F7MOD_sinhcosh_
2039 F7_WEAK
f7_sinhcosh(f7_t * cc,const f7_t * aa,bool do_sinh)2040 void f7_sinhcosh (f7_t *cc, const f7_t *aa, bool do_sinh)
2041 {
2042 f7_t xx7, *xx = &xx7;
2043 // C = exp(A)
2044 f7_exp (cc, aa);
2045 // X = exp(-A)
2046 f7_div (xx, f7_set_u16 (xx, 1), cc);
2047 // sinh(A) = (exp(A) - exp(-A)) / 2
2048 // cosh(A) = (exp(A) + exp(-A)) / 2
2049 f7_addsub (cc, cc, xx, do_sinh);
2050 cc->expo--;
2051 }
2052 #endif // F7MOD_sinhcosh_
2053
2054
2055 #ifdef F7MOD_sinh_
2056 F7_WEAK
f7_sinh(f7_t * cc,const f7_t * aa)2057 void f7_sinh (f7_t *cc, const f7_t *aa)
2058 {
2059 f7_sinhcosh (cc, aa, true);
2060 }
2061 #endif // F7MOD_sinh_
2062
2063
2064 #ifdef F7MOD_cosh_
2065 F7_WEAK
f7_cosh(f7_t * cc,const f7_t * aa)2066 void f7_cosh (f7_t *cc, const f7_t *aa)
2067 {
2068 f7_sinhcosh (cc, aa, false);
2069 }
2070 #endif // F7MOD_cosh_
2071
2072
2073 #ifdef F7MOD_tanh_
2074 F7_WEAK
f7_tanh(f7_t * cc,const f7_t * aa)2075 void f7_tanh (f7_t *cc, const f7_t *aa)
2076 {
2077 // tanh(A) = (exp(2A) - 1) / (exp(2A) + 1)
2078 f7_t xx7, *xx = &xx7;
2079 F7_PCONST_U16 (const f7_t *one, 1);
2080 // C = 2A
2081 f7_copy (cc, aa);
2082 cc->expo++;
2083 // C = exp(2A)
2084 f7_exp (cc, cc);
2085 // X = exp(2A) + 1
2086 f7_add (xx, cc, one);
2087 // C = exp(2A) - 1
2088 f7_Isub (cc, one);
2089 // C = tanh(A)
2090 f7_Idiv (cc, xx);
2091 }
2092 #endif // F7MOD_tanh_
2093
2094
2095 #ifdef F7MOD_atan_
2096
2097 #define MINIMAX_6_6_IN_0_1
2098
2099 #define ARRAY_NAME coeff_atan_zahler
2100 #define FOR_NUMERATOR
2101 #include "libf7-array.def"
2102 #undef FOR_NUMERATOR
2103 #undef ARRAY_NAME
2104
2105 #define ARRAY_NAME coeff_atan_nenner
2106 #define FOR_DENOMINATOR
2107 #include "libf7-array.def"
2108 #undef FOR_DENOMINATOR
2109 #undef ARRAY_NAME
2110
2111 #include "libf7-constdef.h"
2112
2113 F7_WEAK
f7_atan(f7_t * cc,const f7_t * aa)2114 void f7_atan (f7_t *cc, const f7_t *aa)
2115 {
2116 uint8_t a_class = f7_classify (aa);
2117 uint8_t flags = a_class & F7_FLAG_sign;
2118
2119 if (f7_class_nan (a_class))
2120 return f7_set_nan (cc);
2121
2122 f7_t yy7, *yy = &yy7;
2123 f7_t zz7, *zz = &zz7;
2124
2125 if (f7_class_inf (a_class))
2126 {
2127 f7_set_u16 (cc, 0);
2128 goto do_Inf;
2129 }
2130
2131 // C = |A|
2132 f7_copy (cc, aa);
2133 cc->flags = 0;
2134
2135 if (!f7_class_zero (a_class) && cc->expo >= 0)
2136 {
2137 // C >= 1: use atan (x) + atan (1/x) = pi / 2 to reduce to [0, 1].
2138 flags |= 1 << 1;
2139 f7_div (cc, f7_set_u16 (yy, 1), cc);
2140 }
2141 #if !defined (MINIMAX_6_6_IN_0_1)
2142 const uint8_t const_a_msb = 0x89;
2143 const int16_t const_a_expo = -2;
2144 if (f7_abscmp_msb_ge (cc, const_a_msb, const_a_expo))
2145 {
2146 // We have C in [0, 1] and we want to use argument reduction by means
2147 // of addition theorem atan(x) - atan(y) = atan((x - y) / (1 + xy)).
2148 // We want to split [0, 1] into [0, a] u [a, 1] in such a way that
2149 // the upper interval will be mapped to [-a, a]. The system is easy
2150 // to solve and yiels
2151 // y = 1 / sqrt (3) ~ 0.57735 atan(y) = pi / 6
2152 // a = (1 - y) / (1 + y) ~ 0.26795 ~ 0x0.8930A2F * 2^-1.
2153 flags |= 1 << 2;
2154 // C <- (C - Y) / (1 + C * Y) in [-a, a]
2155 const f7_t *cst = F7_CONST_ADDR (1_sqrt3, zz);
2156 f7_mul (yy, cc, cst);
2157 f7_Isub (cc, cst);
2158 f7_Iadd (yy, F7_U16_ADDR (1, zz));
2159 f7_Idiv (cc, yy);
2160 }
2161 #endif
2162 // C <- C * p(C^2) / q(C^2)
2163 f7_square (yy, cc);
2164 f7_horner (zz, yy, n_coeff_atan_zahler, coeff_atan_zahler, NULL);
2165 f7_Imul (zz, cc);
2166 f7_horner (cc, yy, n_coeff_atan_nenner, coeff_atan_nenner, NULL);
2167 f7_div (cc, zz, cc);
2168
2169 #if !defined (MINIMAX_6_6_IN_0_1)
2170 if (flags & (1 << 2))
2171 f7_Iadd (cc, F7_CONST_ADDR (pi_6, yy));
2172 #endif
2173
2174 if (flags & (1 << 1))
2175 {
2176 do_Inf:;
2177 // Y = pi / 2
2178 f7_const (yy, pi);
2179 yy->expo = F7_(const_pi_expo) - 1;
2180 f7_IRsub (cc, yy);
2181 }
2182
2183 cc->flags = a_class & F7_FLAG_sign;
2184 }
2185 #undef MINIMAX_6_6_IN_0_1
2186 #endif // F7MOD_atan_
2187
2188
2189 #ifdef F7MOD_asinacos_
2190
2191 #define ARRAY_NAME coeff_func_a_zahler
2192 #define FOR_NUMERATOR
2193 #include "libf7-array.def"
2194 #undef FOR_NUMERATOR
2195 #undef ARRAY_NAME
2196
2197 #define ARRAY_NAME coeff_func_a_nenner
2198 #define FOR_DENOMINATOR
2199 #include "libf7-array.def"
2200 #undef FOR_DENOMINATOR
2201 #undef ARRAY_NAME
2202
2203 typedef union
2204 {
2205 struct
2206 {
2207 bool sign : 1; // Must be bit F7_FLAGNO_sign.
2208 bool do_acos : 1; // From caller.
2209 bool have_acos : 1; // What we compute from rational approx p/q.
2210 uint8_t res : 5;
2211 };
2212 uint8_t bits;
2213 } asinacos_t;
2214
2215 F7_WEAK
f7_asinacos(f7_t * cc,const f7_t * aa,uint8_t what)2216 void f7_asinacos (f7_t *cc, const f7_t *aa, uint8_t what)
2217 {
2218 f7_t xx7, *xx = &xx7;
2219 f7_t xx27, *xx2 = &xx27;
2220
2221 asinacos_t flags = { .bits = what | f7_signbit (aa) };
2222
2223 f7_abs (xx, aa);
2224
2225 int8_t cmp = f7_cmp (xx, f7_set_u16 (cc, 1));
2226
2227 if (cmp == INT8_MIN
2228 || cmp > 0)
2229 {
2230 return f7_set_nan (cc);
2231 }
2232
2233 if (xx->expo <= -2 || f7_is_zero (xx))
2234 {
2235 // |A| < 1/2: asin(x) = x * a(2*x^2)
2236 f7_square (xx2, xx);
2237 xx2->expo ++;
2238 }
2239 else
2240 {
2241 // |A| > 1/2: acos (1-x) = sqrt(2*x) * a(x)
2242 // C is 1 from above.
2243 f7_IRsub (xx, cc);
2244 f7_copy (xx2, xx);
2245 flags.have_acos = 1;
2246 }
2247
2248 // MiniMax [5/4] numerator.
2249 f7_horner (cc, xx2, n_coeff_func_a_zahler, coeff_func_a_zahler, NULL);
2250
2251 if (flags.have_acos)
2252 {
2253 xx->expo ++;
2254 f7_Isqrt (xx);
2255 }
2256 f7_Imul (cc, xx);
2257
2258 // MiniMax [5/4] denominator.
2259 f7_horner (xx, xx2, n_coeff_func_a_nenner, coeff_func_a_nenner, NULL);
2260
2261 f7_Idiv (cc, xx);
2262
2263 /*
2264 With the current value of C, we have:
2265
2266 | | do_asin | do_acos
2267 | C | A <= 0 | A >= 0 | A <= 0 | A >= 0
2268 ----------+------------+-----------+----------+----------+----------
2269 have_asin | asin (|A|) | -C | C | pi/2 + C | pi/2 - C
2270 have_acos | acos (|A|) | -pi/2 + C | pi/2 - C | pi - C | C
2271
2272 Result = n_pi2 * pi/2 + C * (c_sign ? -1 : 1)
2273 Result (A, do_asin) = asin (A)
2274 Result (A, do_acos) = acos (A)
2275
2276 with
2277 c_sign = do_acos ^ have_acos ^ a_sign
2278 n_pi2 = do_acos + have_acos * (a_sign ^ do_acos) ? (-1 : 1)
2279 n_pi2 in { -1, 0, 1, 2 }
2280 */
2281
2282 // All what matters for c_sign is bit 0.
2283 uint8_t c_sign = flags.bits;
2284 int8_t n_pi2 = flags.do_acos;
2285 c_sign ^= flags.do_acos;
2286 if (flags.have_acos)
2287 {
2288 n_pi2++;
2289 __asm ("" : "+r" (n_pi2));
2290 if (c_sign & 1) // c_sign & 1 = a_sign ^ do_acos
2291 n_pi2 -= 2;
2292 c_sign++;
2293 }
2294
2295 cc->flags = c_sign & F7_FLAG_sign;
2296
2297 if (n_pi2)
2298 {
2299 f7_const (xx, pi);
2300 if (n_pi2 < 0)
2301 xx->sign = 1;
2302 if (n_pi2 != 2)
2303 xx->expo = F7_(const_pi_expo) - 1;
2304
2305 f7_Iadd (cc, xx);
2306 }
2307 }
2308 #endif // F7MOD_asinacos_
2309
2310
2311 #ifdef F7MOD_asin_
2312 F7_WEAK
f7_asin(f7_t * cc,const f7_t * aa)2313 void f7_asin (f7_t *cc, const f7_t *aa)
2314 {
2315 f7_asinacos (cc, aa, 0);
2316 }
2317 #endif // F7MOD_asin_
2318
2319
2320 #ifdef F7MOD_acos_
2321 F7_WEAK
f7_acos(f7_t * cc,const f7_t * aa)2322 void f7_acos (f7_t *cc, const f7_t *aa)
2323 {
2324 f7_asinacos (cc, aa, 1 << 1);
2325 }
2326 #endif // F7MOD_acos_
2327
2328
2329 #ifndef IN_LIBGCC2
2330
2331 #ifdef F7MOD_put_C_
2332
2333 #include <stdio.h>
2334 #include <avr/pgmspace.h>
2335
2336 static F7_INLINE
f7_hex_digit(uint8_t nibble)2337 uint8_t f7_hex_digit (uint8_t nibble)
2338 {
2339 nibble = (uint8_t) (nibble + '0');
2340 if (nibble > '9')
2341 nibble = (uint8_t) (nibble + ('a' - '0' - 10));
2342 return nibble;
2343 }
2344
f7_put_hex2(uint8_t x,FILE * stream)2345 static void f7_put_hex2 (uint8_t x, FILE *stream)
2346 {
2347 putc ('0', stream);
2348 if (x)
2349 {
2350 putc ('x', stream);
2351 putc (f7_hex_digit (x >> 4), stream);
2352 putc (f7_hex_digit (x & 0xf), stream);
2353 }
2354 }
2355
2356 #define XPUT(str) \
2357 fputs_P (PSTR (str), stream)
2358
2359 // Write to STREAM a line that is appropriate for usage in libf7-const.def.
2360
2361 F7_WEAK
f7_put_CDEF(const char * name,const f7_t * aa,FILE * stream)2362 void f7_put_CDEF (const char *name, const f7_t *aa, FILE *stream)
2363 {
2364 char buf[7];
2365 XPUT ("F7_CONST_DEF (");
2366 fputs (name, stream);
2367 XPUT (",\t");
2368 uint8_t a_class = f7_classify (aa);
2369 if (! f7_class_nonzero (a_class))
2370 {
2371 f7_put_hex2 (a_class & F7_FLAGS, stream);
2372 XPUT (",\t0,0,0,0,0,0,0,\t0)");
2373 return;
2374 }
2375 putc ('0' + (a_class & F7_FLAGS), stream);
2376 XPUT (",\t");
2377
2378 for (uint8_t i = 0; i < F7_MANT_BYTES; i++)
2379 {
2380 f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream);
2381 putc (',', stream);
2382 }
2383 putc ('\t', stream);
2384
2385 itoa (aa->expo, buf, 10);
2386 fputs (buf, stream);
2387 XPUT (")");
2388 }
2389
f7_put_C(const f7_t * aa,FILE * stream)2390 void f7_put_C (const f7_t *aa, FILE *stream)
2391 {
2392 char buf[7];
2393
2394 uint8_t a_class = f7_classify (aa);
2395 if (f7_class_nan (a_class))
2396 {
2397 XPUT ("{ .is_nan = 1 }");
2398 return;
2399 }
2400 bool sign = a_class & F7_FLAG_sign;
2401
2402 if (f7_class_inf (a_class))
2403 {
2404 XPUT ("{ .is_nan = 1, .sign = ");
2405 putc ('0' + sign, stream);
2406 XPUT (" }");
2407 return;
2408 }
2409
2410 XPUT ("{ .sign = ");
2411 putc ('0' + sign, stream);
2412
2413 XPUT (", .mant = { ");
2414 for (uint8_t i = 0; i < F7_MANT_BYTES; i++)
2415 {
2416 f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream);
2417 if (i != F7_MANT_BYTES - 1)
2418 putc (',', stream);
2419 }
2420
2421 XPUT (" }, .expo = ");
2422 itoa (aa->expo, buf, 10);
2423 fputs (buf, stream);
2424 XPUT (" }");
2425 }
2426 #endif //F7MOD_put_C_
2427
2428
2429 #ifdef F7MOD_dump_
2430
2431 #include <avr/pgmspace.h>
2432
2433 #ifndef AVRTEST_H
2434
2435 #include <stdio.h>
2436
LOG_PSTR(const char * str)2437 static void LOG_PSTR (const char *str)
2438 {
2439 fputs_P (str, stdout);
2440 }
2441
LOG_PFMT_U16(const char * fmt,uint16_t x)2442 static void LOG_PFMT_U16 (const char *fmt, uint16_t x)
2443 {
2444 printf_P (fmt, x);
2445 }
2446
LOG_PFMT_FLOAT(const char * fmt,float x)2447 static void LOG_PFMT_FLOAT (const char *fmt, float x)
2448 {
2449 printf_P (fmt, x);
2450 }
2451
2452 #define LOG_X8(X) LOG_PFMT_U16 (PSTR (" 0x%02x "), (uint8_t)(X))
2453 #define LOG_PFMT_S16(FMT, X) LOG_PFMT_U16 (FMT, (unsigned)(X))
2454 #define LOG_PFMT_ADDR(FMT, X) LOG_PFMT_U16 (FMT, (unsigned)(X))
2455
2456 #endif // AVRTEST_H
2457
dump_byte(uint8_t b)2458 static void dump_byte (uint8_t b)
2459 {
2460 LOG_PSTR (PSTR (" "));
2461 for (uint8_t i = 0; i < 8; i++)
2462 {
2463 LOG_PSTR ((b & 0x80) ? PSTR ("1") : PSTR ("0"));
2464 b = (uint8_t) (b << 1);
2465 }
2466 }
2467
f7_dump_mant(const f7_t * aa)2468 void f7_dump_mant (const f7_t *aa)
2469 {
2470 LOG_PSTR (PSTR ("\tmant ="));
2471 for (int i = F7_MANT_BYTES - 1; i >= 0; i--)
2472 LOG_X8 (aa->mant[i]);
2473 LOG_PSTR (PSTR ("\n\t ="));
2474
2475 for (int i = F7_MANT_BYTES - 1; i >= 0; i--)
2476 dump_byte (aa->mant[i]);
2477 LOG_PSTR (PSTR ("\n"));
2478 }
2479
f7_dump(const f7_t * aa)2480 void f7_dump (const f7_t *aa)
2481 {
2482 LOG_PFMT_ADDR (PSTR ("\n0x%04x\tflags = "), aa);
2483 dump_byte (aa->flags);
2484 uint8_t a_class = f7_classify (aa);
2485 LOG_PSTR (PSTR (" = "));
2486 LOG_PSTR (f7_class_sign (a_class) ? PSTR ("-") : PSTR ("+"));
2487 if (f7_class_inf (a_class)) LOG_PSTR (PSTR ("Inf "));
2488 if (f7_class_nan (a_class)) LOG_PSTR (PSTR ("NaN "));
2489 if (f7_class_zero (a_class)) LOG_PSTR (PSTR ("0 "));
2490 if (f7_class_number (a_class)) LOG_PSTR (PSTR ("Number "));
2491
2492 LOG_PFMT_FLOAT (PSTR (" = %.10g\n"), f7_get_float (aa));
2493 LOG_PFMT_S16 (PSTR ("\texpo = %d\n"), aa->expo);
2494
2495 f7_dump_mant (aa);
2496 }
2497 #endif // F7MOD_dump_
2498
2499 #endif // ! libgcc
2500
2501 #endif // !AVR_TINY
2502