1/* Copyright (c) 2005, Dmitry Xmelkov
2   All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6
7   * Redistributions of source code must retain the above copyright
8     notice, this list of conditions and the following disclaimer.
9   * Redistributions in binary form must reproduce the above copyright
10     notice, this list of conditions and the following disclaimer in
11     the documentation and/or other materials provided with the
12     distribution.
13   * Neither the name of the copyright holders nor the names of
14     contributors may be used to endorse or promote products derived
15     from this software without specific prior written permission.
16
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  POSSIBILITY OF SUCH DAMAGE. */
28
29/* $Id: ftoa_engine.S 2191 2010-11-05 13:45:57Z arcanum $ */
30
31#if !defined(__AVR_TINY__)
32
33#ifndef	__DOXYGEN__
34
35#include "macros.inc"
36#include "ftoa_engine.h"
37
38#if  defined(__AVR_HAVE_LPMX__) && __AVR_HAVE_LPMX__
39#  define AVR_ENH_LPM	1
40#else
41#  define AVR_ENH_LPM	0
42#endif
43
44/*
45   int __ftoa_engine (double val, char *buf,
46                      unsigned char prec, unsigned char maxdgs)
47 Input:
48    val    - value to convert
49    buf    - output buffer address
50    prec   - precision: number of decimal digits is 'prec + 1'
51    maxdgs - (0 if unused) precision restriction for "%f" specification
52
53 Output:
54    return     - decimal exponent of first digit
55    buf[0]     - flags (FTOA_***)
56    buf[1],... - decimal digits
57    Number of digits:
58	maxdgs == 0 ? prec+1 :
59	(buf[0] & FTOA_CARRY) == 0 || buf[1] != '1' ?
60	    aver(1, maxdgs+exp, prec+1) :
61	    aver(1, masdgs+exp-1, prec+1)
62
63 Notes:
64    * Output string is not 0-terminated. For possibility of user's buffer
65    usage in any case.
66    * If used, 'maxdgs' is a number of digits for value with zero exponent.
67*/
68
69    /* Input */
70#define maxdgs	r16
71#define	prec	r18
72#define	buf_lo	r20
73#define	buf_hi	r21
74#define	val_lo	r22
75#define	val_hi	r23
76#define	val_hlo	r24
77#define	val_hhi	r25
78
79    /* Float value parse	*/
80#define	flag	r19
81
82    /* Multiplication of mantisses	*/
83#define	exp_sv	r17
84#define	mlt_1	r19	/* lowest result byte	*/
85#define mlt_2	r14
86#define	mlt_3	r15
87#define	mlt_4	r20
88#define	mlt_5	r21
89#define	mlt_6	r28
90#define	mlt_7	r29
91
92    /* Conversion to string	*/
93#define	pwr_2	r1	/* lowest byte of 'powr10' element	*/
94#define	pwr_3	r17
95#define	pwr_4	r19
96#define	pwr_5	r22
97#define	pwr_6	r25
98#define	pwr_7	r0
99#define	digit	r23
100#define	exp10	r24
101
102    /* Fixed */
103#define	zero	r1
104
105    ASSEMBLY_CLIB_SECTION
106
107    .global	__ftoa_engine
108    .type	__ftoa_engine, "function"
109__ftoa_engine:
110
111/* --------------------------------------------------------------------
112   Float value parse.
113*/
114  ; limit 'prec'
115	cpi	prec, 8
116	brlo	1f
117	ldi	prec, 7
1181:
119  ; init.
120	clr	flag
121	X_movw	XL, buf_lo
122  ; val_hhi := exponent, sign test and remove
123#if  FTOA_MINUS != 1
124#  error  FTOA_MINUS must be 1:  add with carry used
125#endif
126	lsl	val_hhi
127	adc	flag, zero		; FTOA_MINUS
128	sbrc	val_hlo, 7
129	ori	val_hhi, 1
130  ; zero test
131	adiw	val_hlo, 0
132	cpc	val_lo, zero
133	cpc	val_hi, zero
134	brne	3f
135  ; return 0
136	ori	flag, FTOA_ZERO
137	subi	prec, -2
1382:	st	X+, flag
139	ldi	flag, '0'
140	dec	prec
141	brne	2b
142	ret				; r24,r25 == 0
1433:
144  ; infinity, NaN ?
145#if  FTOA_NAN != 2 * FTOA_INF
146#  error  Must: FTOA_NAN == 2*FTOA_INF: 'rjmp' is absent
147#endif
148	cpi	val_hhi, 0xff
149	brlo	6f
150	cpi	val_hlo, 0x80
151	cpc	val_hi, zero
152	cpc	val_lo, zero
153	breq	5f
154	subi	flag, -FTOA_INF		; FTOA_NAN
1555:	subi	flag, -FTOA_INF
1566:
157  ; write flags byte
158	st	X+, flag
159  ; hidden bit
160	cpi	val_hhi, 1
161	brlo	7f			; if subnormal value
162	ori	val_hlo, 0x80
1637:	adc	val_hhi, zero
164  ; pushes
165	push	r29
166	push	r28
167	push	r17
168	push	r16
169	push	r15
170	push	r14
171
172/* --------------------------------------------------------------------
173   Multiplication of mantisses (val and table).
174   At the begin:
175	val_hlo .. val_lo  - input value mantisse
176	val_hhi            - input value exponent
177	X                  - second byte address (string begin)
178   At the end:
179	mlt_7 .. mlt_2     - multiplication result
180	exp10              - decimal exponent
181*/
182
183  ; save
184	mov	exp_sv, val_hhi
185  ; Z := & base10[exp / 8]	(sizeof(base10[0]) == 5)
186	andi	val_hhi, ~7
187	lsr	val_hhi			; (exp/8) * 4
188	mov	ZL, val_hhi
189	lsr	val_hhi
190	lsr	val_hhi			; exp/8
191	add	ZL, val_hhi		; (exp/8) * 5
192	clr	ZH
193	subi	ZL, lo8(-(.L_base10))
194	sbci	ZH, hi8(-(.L_base10))
195  ; highest mantissa byte  (mult. shifting prepare)
196	clr	val_hhi
197  ; result initializ.
198	clr	mlt_1
199	clr	mlt_2
200	clr	mlt_3
201	X_movw	mlt_4, mlt_2
202	X_movw	mlt_6, mlt_2
203
204  ; multiply to 1-st table byte
205#if  AVR_ENH_LPM
206	lpm	r0, Z+
207#else
208	lpm
209	adiw	ZL, 1
210#endif
211	sec			; for loop end control
212	ror	r0
213  ; addition
21410:	brcc	11f
215	add	mlt_1, val_lo
216	adc	mlt_2, val_hi
217	adc	mlt_3, val_hlo
218	adc	mlt_4, val_hhi
219	adc	mlt_5, zero
220  ; arg shift
22111:	lsl	val_lo
222	rol	val_hi
223	rol	val_hlo
224	rol	val_hhi
225  ; next bit
226	lsr	r0
227	brne	10b
228
229  ; second table byte
230#if  AVR_ENH_LPM
231	lpm	r0, Z+		; C flag is stay 1
232#else
233	lpm
234	adiw	ZL, 1
235	sec
236#endif
237	ror	r0
238  ; addition
23912:	brcc	13f
240	add	mlt_2, val_hi		; val_hi is the least byte now
241	adc	mlt_3, val_hlo
242	adc	mlt_4, val_hhi
243	adc	mlt_5, val_lo
244	adc	mlt_6, zero
245  ; arg shift
24613:	lsl	val_hi
247	rol	val_hlo
248	rol	val_hhi
249	rol	val_lo
250  ; next bit
251	lsr	r0
252	brne	12b
253
254  ; 3-t table byte
255#if  AVR_ENH_LPM
256	lpm	r0, Z+		; C flag is stay 1
257#else
258	lpm
259	adiw	ZL, 1
260	sec
261#endif
262	ror	r0
263  ; addition
26414:	brcc	15f
265	add	mlt_3, val_hlo		; val_hlo is the least byte now
266	adc	mlt_4, val_hhi
267	adc	mlt_5, val_lo
268	adc	mlt_6, val_hi
269	adc	mlt_7, zero
270  ; arg shift
27115:	lsl	val_hlo
272	rol	val_hhi
273	rol	val_lo
274	rol	val_hi
275  ; next bit
276	lsr	r0
277	brne	14b
278
279  ; 4-t table byte
280#if  AVR_ENH_LPM
281	lpm	r0, Z+		; C flag is stay 1
282#else
283	lpm
284#endif
285	ror	r0
286  ; addition
28716:	brcc	17f
288	add	mlt_4, val_hhi		; val_hhi is the least byte now
289	adc	mlt_5, val_lo
290	adc	mlt_6, val_hi
291	adc	mlt_7, val_hlo
292  ; arg shift
29317:	lsl	val_hhi
294	rol	val_lo
295	rol	val_hi
296	rol	val_hlo
297  ; next bit
298	lsr	r0
299	brne	16b
300
301  ; decimal exponent
302#if  AVR_ENH_LPM
303	lpm	exp10, Z
304#else
305	adiw	ZL, 1
306	lpm
307	mov	exp10, r0
308#endif
309
310  ; result shift:  mlt_7..2 >>= (~exp & 7)
311	com	exp_sv
312	andi	exp_sv, 7
313	breq	19f
31418:	lsr	mlt_7
315	ror	mlt_6
316	ror	mlt_5
317	ror	mlt_4
318	ror	mlt_3
319	ror	mlt_2
320	dec	exp_sv
321	brne	18b
32219:
323
324/* --------------------------------------------------------------------
325   Conversion to string.
326
327   Registers usage:
328      mlt_7 .. mlt_2	- new mantissa (multiplication result)
329      pwr_7 .. pwr_2	- 'powr10' table element
330      Z			- 'powr10' table pointer
331      X			- output string pointer
332      maxdgs		- number of digits
333      prec		- number of digits stays to output
334      exp10		- decimal exponent
335      digit		- conversion process
336
337   At the end:
338      X			- end of buffer (nonfilled byte)
339      exp10		- corrected dec. exponent
340      mlt_7 .. mlt_2	- remainder
341      pwr_7 .. pwr_2	- last powr10[] element
342
343   Notes:
344     * It is possible to leave out powr10'x table with subnormal value.
345      Result: accuracy degrease on the rounding phase.  No matter: high
346      precision with subnormals is not needed. (Now 0x00000001 is converted
347      exactly on prec = 5, i.e. 6 digits.)
348*/
349
350  ; to find first digit
351	ldi	ZL, lo8(.L_powr10)
352	ldi	ZH, hi8(.L_powr10)
353	set
354  ; 'pwr10' element reading
355.L_digit:
356	X_lpm	pwr_2, Z+
357	X_lpm	pwr_3, Z+
358	X_lpm	pwr_4, Z+
359	X_lpm	pwr_5, Z+
360	X_lpm	pwr_6, Z+
361	X_lpm	pwr_7, Z+
362  ; 'digit' init.
363	ldi	digit, '0' - 1
364  ; subtraction loop
36520:	inc	digit
366	sub	mlt_2, pwr_2
367	sbc	mlt_3, pwr_3
368	sbc	mlt_4, pwr_4
369	sbc	mlt_5, pwr_5
370	sbc	mlt_6, pwr_6
371	sbc	mlt_7, pwr_7
372	brsh	20b
373  ; restore mult
374	add	mlt_2, pwr_2
375	adc	mlt_3, pwr_3
376	adc	mlt_4, pwr_4
377	adc	mlt_5, pwr_5
378	adc	mlt_6, pwr_6
379	adc	mlt_7, pwr_7
380  ; analisys
381	brtc	25f
382	cpi	digit, '0'
383	brne	21f		; this is the first digit finded
384	dec	exp10
385	rjmp	.L_digit
386  ; now is the first digit
38721:	clt
388  ; number of digits
389	subi	maxdgs, 1
390	brlo	23f			; maxdgs was 0
391	add	maxdgs, exp10
392	brpl	22f
393	clr	maxdgs
39422:	cp	maxdgs, prec
395	brsh	23f
396	mov	prec, maxdgs
39723:	inc	prec
398	mov	maxdgs, prec
399  ; operate digit
40025:	cpi	digit, '0' + 10
401	brlo	27f
402  ; overflow, digit > '9'
403	ldi	digit, '9'
40426:	st	X+, digit
405	dec	prec
406	brne	26b
407	rjmp	.L_up
408  ; write digit
40927:	st	X+, digit
410	dec	prec
411	brne	.L_digit
412
413/* --------------------------------------------------------------------
414    Rounding.
415*/
416.L_round:
417  ; pwr10 /= 2
418	lsr	pwr_7
419	ror	pwr_6
420	ror	pwr_5
421	ror	pwr_4
422	ror	pwr_3
423	ror	pwr_2
424  ; mult -= pwr10  (half of last 'pwr10' value)
425	sub	mlt_2, pwr_2
426	sbc	mlt_3, pwr_3
427	sbc	mlt_4, pwr_4
428	sbc	mlt_5, pwr_5
429	sbc	mlt_6, pwr_6
430	sbc	mlt_7, pwr_7
431  ; rounding direction?
432	brlo	.L_rest
433  ; round to up
434.L_up:
435	inc	prec
436	ld	digit, -X
437	inc	digit
438	cpi	digit, '9' + 1
439	brlo	31f
440	ldi	digit, '0'
44131:	st	X, digit
442	cpse	prec, maxdgs
443	brsh	.L_up
444  ; it was a carry to master digit
445	ld	digit, -X		; flags
446	ori	digit, FTOA_CARRY	; 'C' is not changed
447	st	X+, digit
448	brlo	.L_rest			; above comparison
449  ; overflow
450	inc	exp10
451	ldi	digit, '1'
45232:	st	X+, digit
453	ldi	digit, '0'
454	dec	prec
455	brne	32b
456  ; restore
457.L_rest:
458	clr	zero
459	pop	r14
460	pop	r15
461	pop	r16
462	pop	r17
463	pop	r28
464	pop	r29
465  ; return
466	clr	r25
467	sbrc	exp10, 7		; high byte
468	com	r25
469	ret
470
471    .size  __ftoa_engine, . - __ftoa_engine
472
473/* --------------------------------------------------------------------
474    Tables.  '.L_powr10' is placed first -- for subnormals stability.
475*/
476    .section .progmem.data,"a",@progbits
477
478    .type .L_powr10, "object"
479.L_powr10:
480	.byte	0, 64, 122, 16, 243, 90	; 100000000000000
481	.byte	0, 160, 114, 78, 24, 9	; 10000000000000
482	.byte	0, 16, 165, 212, 232, 0	; 1000000000000
483	.byte	0, 232, 118, 72, 23, 0	; 100000000000
484	.byte	0, 228, 11, 84, 2, 0	; 10000000000
485	.byte	0, 202, 154, 59, 0, 0	; 1000000000
486	.byte	0, 225, 245, 5, 0, 0	; 100000000
487	.byte	128, 150, 152, 0, 0, 0	; 10000000
488	.byte	64, 66, 15, 0, 0, 0	; 1000000
489	.byte	160, 134, 1, 0, 0, 0	; 100000
490	.byte	16, 39, 0, 0, 0, 0	; 10000
491	.byte	232, 3, 0, 0, 0, 0	; 1000
492	.byte	100, 0, 0, 0, 0, 0	; 100
493	.byte	10, 0, 0, 0, 0, 0	; 10
494	.byte	1, 0, 0, 0, 0, 0	; 1
495    .size .L_powr10, . - .L_powr10
496
497    .type	.L_base10, "object"
498.L_base10:
499	.byte	44, 118, 216, 136, -36	; 2295887404
500	.byte	103, 79, 8, 35, -33	; 587747175
501	.byte	193, 223, 174, 89, -31	; 1504632769
502	.byte	177, 183, 150, 229, -29	; 3851859889
503	.byte	228, 83, 198, 58, -26	; 986076132
504	.byte	81, 153, 118, 150, -24	; 2524354897
505	.byte	230, 194, 132, 38, -21	; 646234854
506	.byte	137, 140, 155, 98, -19	; 1654361225
507	.byte	64, 124, 111, 252, -17	; 4235164736
508	.byte	188, 156, 159, 64, -14	; 1084202172
509	.byte	186, 165, 111, 165, -12	; 2775557562
510	.byte	144, 5, 90, 42, -9	; 710542736
511	.byte	92, 147, 107, 108, -7	; 1818989404
512	.byte	103, 109, 193, 27, -4	; 465661287
513	.byte	224, 228, 13, 71, -2	; 1192092896
514	.byte	245, 32, 230, 181, 0	; 3051757813
515	.byte	208, 237, 144, 46, 3	; 781250000
516	.byte	0, 148, 53, 119, 5	; 2000000000
517	.byte	0, 128, 132, 30, 8	; 512000000
518	.byte	0, 0, 32, 78, 10	; 1310720000
519	.byte	0, 0, 0, 200, 12	; 3355443200
520	.byte	51, 51, 51, 51, 15	; 858993459
521	.byte	152, 110, 18, 131, 17	; 2199023256
522	.byte	65, 239, 141, 33, 20	; 562949953
523	.byte	137, 59, 230, 85, 22	; 1441151881
524	.byte	207, 254, 230, 219, 24	; 3689348815
525	.byte	209, 132, 75, 56, 27	; 944473297
526	.byte	247, 124, 29, 144, 29	; 2417851639
527	.byte	164, 187, 228, 36, 32	; 618970020
528	.byte	50, 132, 114, 94, 34	; 1584563250
529	.byte	129, 0, 201, 241, 36	; 4056481921
530	.byte	236, 161, 229, 61, 39	; 1038459372
531    .size .L_base10, . - .L_base10
532
533	.end
534#endif	/* !__DOXYGEN__ */
535
536#endif /* !defined(__AVR_TINY__) */
537