1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <vdb/extern.h>
27
28 #include <vdb/xform.h>
29 #include <vdb/schema.h>
30 #include <klib/data-buffer.h>
31 #include <klib/text.h>
32 #include <klib/printf.h>
33 #include <klib/debug.h>
34 #include <klib/rc.h>
35 #include <sysalloc.h>
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <os-native.h>
41 #include <assert.h>
42
43
44 /* the number of PrintFmt, PrintArg and String
45 elements to allocate in function-local storage */
46 #define LOCAL_FMT_COUNT 64
47
48
49 typedef struct Sprintf Sprintf;
50 struct Sprintf
51 {
52 const PrintFmt *fmt;
53 PrintArg *args;
54 String *str;
55 };
56
57 static uint8_t const radix2_size [ 4 ] = { 8, 16, 32, 64 };
58 static uint8_t const radix8_size [ 4 ] = { 3, 6, 11, 22 };
59 static uint8_t const radix10_size [ 4 ] = { 3, 5, 10, 20 };
60 static uint8_t const radix16_size [ 4 ] = { 2, 4, 8, 16 };
61
62 #if _DEBUGGING
63 static
validate_obj(const Sprintf * obj,bool fmt_only)64 rc_t validate_obj ( const Sprintf *obj, bool fmt_only )
65 {
66 uint32_t i;
67
68 /* validate the format blocks */
69 for ( i = 0; obj -> fmt [ i ] . type != sptTerm; ++ i )
70 {
71 /* at this point, only looking for bad formats */
72 if ( obj -> fmt [ i ] . type == sptLiteral )
73 {
74 if ( obj -> fmt [ i ] . u . l . text == NULL )
75 return RC ( rcXF, rcFunction, rcConstructing, rcString, rcNull );
76 if ( obj -> fmt [ i ] . u . l . size == 0 )
77 return RC ( rcXF, rcFunction, rcConstructing, rcString, rcEmpty );
78 if ( string_chr ( obj -> fmt [ i ] . u . l . text, obj -> fmt [ i ] . u . l . size, 0 ) != NULL )
79 return RC ( rcXF, rcFunction, rcConstructing, rcString, rcInvalid );
80 }
81 }
82
83 return 0;
84 }
85 #endif
86
87 static
get_radix_size(const PrintFmt * fmt,uint32_t selector)88 size_t get_radix_size ( const PrintFmt *fmt, uint32_t selector )
89 {
90 const uint8_t *radix_size;
91 switch ( fmt -> radix )
92 {
93 case 0:
94 return 0;
95 case 10:
96 radix_size = radix10_size;
97 break;
98 case 16:
99 radix_size = radix16_size;
100 break;
101 case 8:
102 radix_size = radix8_size;
103 break;
104 default:
105 radix_size = radix2_size;
106 }
107 return radix_size [ selector ];
108 }
109
110 typedef struct FuncArgs FuncArgs;
111 struct FuncArgs
112 {
113 const VRowData *argv;
114 uint32_t argc;
115 uint32_t idx;
116 };
117
118 static
access_cell_data(FuncArgs * args,uint64_t * row_len,size_t * size)119 const void *access_cell_data ( FuncArgs *args, uint64_t *row_len, size_t *size )
120 {
121 size_t elem_bytes;
122 const uint8_t *data;
123
124 if ( args -> idx == args -> argc )
125 return NULL;
126
127 elem_bytes = args -> argv [ args -> idx ] . u . data . elem_bits / 8;
128 assert ( elem_bytes * 8 == args -> argv [ args -> idx ] . u . data . elem_bits );
129
130 data = args -> argv [ args -> idx ] . u . data . base;
131 * row_len = args -> argv [ args -> idx ] . u . data . elem_count;
132
133 data += args -> argv [ args -> idx ] . u . data . first_elem * elem_bytes;
134
135 if ( size != NULL )
136 * size = ( size_t ) ( * row_len * elem_bytes );
137
138 ++ args -> idx;
139 return data;
140 }
141
142 static
capture_uint_scalar(FuncArgs * args,uint64_t * val)143 rc_t capture_uint_scalar ( FuncArgs *args, uint64_t *val )
144 {
145 uint64_t row_len;
146 const void *data = access_cell_data ( args, & row_len, NULL );
147
148 if ( data == NULL )
149 return RC ( rcXF, rcFunction, rcExecuting, rcData, rcNull );
150
151 if ( row_len != 1 )
152 return RC ( rcXF, rcFunction, rcExecuting, rcRange, rcExcessive );
153
154 switch ( args -> argv [ args -> idx - 1 ] . u . data . elem_bits )
155 {
156 case 8:
157 * val = * ( const uint8_t* ) data;
158 break;
159 case 16:
160 * val = * ( const uint16_t* ) data;
161 break;
162 case 32:
163 * val = * ( const uint32_t* ) data;
164 break;
165 case 64:
166 * val = * ( const uint64_t* ) data;
167 break;
168 }
169
170 return 0;
171 }
172
173 static
sprintf_func(void * obj,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])174 rc_t CC sprintf_func ( void *obj,
175 const VXformInfo *info, int64_t row_id, VRowResult *rslt,
176 uint32_t argc, const VRowData argv [] )
177 {
178 rc_t rc;
179 size_t cell_size;
180 uint32_t str_idx, fmt_idx, arg_idx;
181
182 Sprintf *self = obj;
183
184 const PrintFmt *fmt = self -> fmt;
185 PrintArg *args = self -> args;
186 String *str = self -> str;
187
188 FuncArgs vargs;
189 vargs . argv = argv;
190 vargs . argc = argc;
191 vargs . idx = 0;
192
193 str_idx = fmt_idx = arg_idx = 0;
194
195 #if _DEBUGGING
196 rc = validate_obj ( self, false );
197 if ( rc != 0 )
198 KDbgMsg ( "%s - self is bad on entry: %R\n", __func__, rc );
199 #endif
200
201 /* initial pass - prepare argument list & estimate cell size */
202 for ( rc = 0, cell_size = 0; fmt [ fmt_idx ] . type != sptTerm; ++ fmt_idx )
203 {
204 size_t arg_size;
205 bool check_index;
206 uint64_t row_len, start_idx, select_len;
207 uint32_t ext_start_index, ext_stop_index;
208
209 row_len = 0;
210
211 /* test for external field width, precision, index */
212 if ( fmt [ fmt_idx ] . ext_field_width )
213 {
214 rc = capture_uint_scalar ( & vargs, & args [ arg_idx ++ ] . u );
215 if ( rc != 0 )
216 return rc;
217 }
218 if ( fmt [ fmt_idx ] . ext_precision )
219 {
220 rc = capture_uint_scalar ( & vargs, & args [ arg_idx ++ ] . u );
221 if ( rc != 0 )
222 return rc;
223 }
224
225 if ( ! fmt [ fmt_idx ] . ext_start_index )
226 start_idx = fmt [ fmt_idx ] . u . f . start_idx;
227 else
228 {
229 /* if the start index is the last in vector,
230 this needs to be suspended until later */
231 if ( ! fmt [ fmt_idx ] . inf_start_index )
232 {
233 rc = capture_uint_scalar ( & vargs, & start_idx );
234 if ( rc != 0 )
235 return rc;
236 args [ arg_idx ] . u = start_idx;
237 }
238
239 ext_start_index = arg_idx ++;
240 }
241
242 if ( ! fmt [ fmt_idx ] . ext_stop_index && ! fmt [ fmt_idx ] . ext_select_len )
243 select_len = fmt [ fmt_idx ] . u . f . select_len;
244 else
245 {
246 /* if the stop index is the last in vector,
247 this needs to be suspended until later */
248 if ( ! fmt [ fmt_idx ] . inf_stop_index )
249 {
250 rc = capture_uint_scalar ( & vargs, & select_len );
251 if ( rc != 0 )
252 return rc;
253 args [ arg_idx ] . u = select_len;
254 }
255 ext_stop_index = arg_idx ++;
256 }
257
258 /* make an estimate of the bytes needed by sprintf engine for argument
259 get row_len at the same time */
260 check_index = false;
261 switch ( fmt [ fmt_idx ] . type )
262 {
263 case sptLiteral: /* char literal - arg in fmt */
264 assert ( fmt [ fmt_idx ] . u . l . text != NULL );
265 arg_size = fmt [ fmt_idx ] . u . l . size;
266 row_len = arg_size;
267 -- arg_idx;
268 break;
269 case sptSignedInt8Vect: /* signed vector int - arg is d8 */
270 args [ arg_idx ] . d8 = access_cell_data ( & vargs, & row_len, NULL );
271 arg_size = get_radix_size ( & fmt [ fmt_idx ], 0 ) + 1;
272 break;
273 case sptSignedInt16Vect: /* signed vector int - arg is d16 */
274 args [ arg_idx ] . d16 = access_cell_data ( & vargs, & row_len, NULL );
275 arg_size = get_radix_size ( & fmt [ fmt_idx ], 1 ) + 1;
276 break;
277 case sptSignedInt32Vect: /* signed vector int - arg is d32 */
278 args [ arg_idx ] . d32 = access_cell_data ( & vargs, & row_len, NULL );
279 arg_size = get_radix_size ( & fmt [ fmt_idx ], 2 ) + 1;
280 break;
281 case sptSignedInt64Vect: /* signed vector int - arg is d64 */
282 args [ arg_idx ] . d64 = access_cell_data ( & vargs, & row_len, NULL );
283 arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 1;
284 break;
285 case sptUnsignedInt8Vect: /* unsigned vector int - arg is u8 */
286 args [ arg_idx ] . u8 = access_cell_data ( & vargs, & row_len, NULL );
287 arg_size = get_radix_size ( & fmt [ fmt_idx ], 0 ) + 2;
288 break;
289 case sptUnsignedInt16Vect: /* unsigned vector int - arg is u16 */
290 args [ arg_idx ] . u16 = access_cell_data ( & vargs, & row_len, NULL );
291 arg_size = get_radix_size ( & fmt [ fmt_idx ], 1 ) + 2;
292 break;
293 case sptUnsignedInt32Vect: /* unsigned vector int - arg is u32 */
294 args [ arg_idx ] . u32 = access_cell_data ( & vargs, & row_len, NULL );
295 arg_size = get_radix_size ( & fmt [ fmt_idx ], 2 ) + 2;
296 break;
297 case sptUnsignedInt64Vect: /* unsigned vector int - arg is u64 */
298 args [ arg_idx ] . u64 = access_cell_data ( & vargs, & row_len, NULL );
299 arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 2;
300 break;
301 case sptFloat32Vect: /* vector float - arg is f32 */
302 args [ arg_idx ] . f32 = access_cell_data ( & vargs, & row_len, NULL );
303 arg_size = 32;
304 break;
305 case sptFloat64Vect: /* vector float - arg is f64 */
306 args [ arg_idx ] . f64 = access_cell_data ( & vargs, & row_len, NULL );
307 arg_size = 32;
308 break;
309 case sptFloatLongVect: /* vector float - arg is flong */
310 args [ arg_idx ] . flong = access_cell_data ( & vargs, & row_len, NULL );
311 arg_size = 32;
312 break;
313 case sptString: /* vector character - arg is S */
314 str [ str_idx ] . addr = access_cell_data ( & vargs, & row_len, & str [ str_idx ] . size );
315 str [ str_idx ] . len = ( uint32_t ) str [ str_idx ] . size;
316 arg_size = str [ str_idx ] . size;
317 args [ arg_idx ] . S = & str [ str_idx ++ ];
318 check_index = true;
319 break;
320 case sptUCS2String: /* vector character - arg is S */
321 case sptUTF32String: /* vector character - arg is S */
322 return RC ( rcXF, rcFunction, rcExecuting, rcType, rcUnsupported );
323 case sptRowId: /* current row id - arg is d */
324 args [ arg_idx ] . d = row_id;
325 arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 1;
326 ++ arg_idx;
327 continue;
328 case sptRowLen: /* current row length - arg is u */
329 access_cell_data ( & vargs, & args [ arg_idx ] . u, NULL );
330 arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 2;
331 ++ arg_idx;
332 continue;
333 }
334
335 /* back-fill based upon row-length in case of non-empty rows */
336 if ( row_len != 0 )
337 {
338 /* start index is last in row */
339 if ( fmt [ fmt_idx ] . inf_start_index )
340 start_idx = args [ ext_start_index ] . u = row_len - 1;
341
342 /* end index is last in row */
343 if ( fmt [ fmt_idx ] . inf_stop_index )
344 select_len = args [ ext_stop_index ] . u = row_len - 1;
345 }
346
347 /* detect runtime error condition */
348 if ( start_idx >= row_len )
349 args [ arg_idx ] . p = NULL;
350 else
351 {
352 /* adjust size by index */
353 if ( check_index )
354 {
355 /* the select_len should be correct,
356 unless the end is a stop coordinate */
357 if ( fmt [ fmt_idx ] . ext_stop_index )
358 {
359 if ( select_len <= start_idx )
360 select_len = 1;
361 else
362 select_len -= start_idx - 1;
363 }
364
365 /* select_len of 0 means infinite */
366 if ( select_len == 0 )
367 select_len = row_len - start_idx;
368
369 /* real size after sub-str */
370 arg_size = argv [ vargs . idx - 1 ] . u . data . elem_bits * select_len / 8;
371 }
372
373 /* incorporate size into calcuation */
374 cell_size += arg_size;
375 }
376
377 /* done with argument */
378 ++ arg_idx;
379 }
380
381 /* round cell-size up to nearest 4K */
382 cell_size = ( cell_size + 4095 ) & ~ ( size_t ) 4095;
383
384 /* set output buffer size */
385 rc = KDataBufferResize( rslt -> data, cell_size );
386 if ( rc == 0 )
387 {
388 #if _DEBUGGING
389 rc = validate_obj ( self, false );
390 if ( rc != 0 )
391 KDbgMsg ( "%s - self is bad before invoking structured_sprintf: %R\n", __func__, rc );
392 #endif
393 /* invoke sprintf engine */
394 rc = structured_sprintf ( rslt -> data -> base,
395 rslt -> data -> elem_count, & cell_size, fmt, args );
396
397 /* recover from buffer insufficient */
398 if ( GetRCState ( rc ) == rcInsufficient && GetRCObject ( rc ) == (enum RCObject)rcBuffer )
399 {
400 rc = KDataBufferResize ( rslt -> data, cell_size + 1 );
401 if ( rc == 0 )
402 {
403 rc = structured_sprintf ( rslt -> data -> base,
404 rslt -> data -> elem_count, & cell_size, fmt, args );
405 }
406 }
407
408 if ( rc == 0 )
409 {
410 /* set the return parameters */
411 rc = KDataBufferResize ( rslt -> data, cell_size );
412 if ( rc == 0 )
413 {
414 rslt -> elem_count = cell_size;
415 rslt -> elem_bits = 8;
416 }
417 }
418 }
419
420 return rc;
421 }
422
423 typedef struct ParseData ParseData;
424 struct ParseData
425 {
426 KDataBuffer overflow;
427 size_t fmt_size;
428 size_t lit_size;
429 PrintFmt *fmt;
430 uint32_t str_idx;
431 uint32_t fmt_idx;
432 uint32_t arg_idx;
433 };
434
435 /* create_overflow
436 * create a buffer so large that it cannot overflow again
437 * copy in existing structures
438 */
439 static
create_overflow(ParseData * pd,uint32_t fmt_idx)440 rc_t create_overflow ( ParseData *pd, uint32_t fmt_idx )
441 {
442 /* infer that we can never have more format items than characters in the string... */
443
444 /* our size will create a format for every byte.
445 this should be a total overkill. */
446 size_t buff_size = sizeof pd -> fmt [ 0 ] * pd -> fmt_size;
447
448 /* make the buffer */
449 rc_t rc = KDataBufferMakeBytes ( & pd -> overflow, buff_size );
450 if ( rc == 0 )
451 {
452 /* capture pointers to stack structures */
453 const PrintFmt *fmt = pd -> fmt;
454
455 /* destination pointer */
456 pd -> fmt = pd -> overflow . base;
457
458 /* copy existing data */
459 memmove ( pd -> fmt, fmt, fmt_idx * sizeof pd -> fmt [ 0 ] );
460 }
461
462 pd -> fmt_idx = fmt_idx;
463
464 return rc;
465 }
466
467 typedef struct VAList VAList;
468 struct VAList
469 {
470 const VFunctionParams *dp;
471 uint32_t idx;
472 };
473
474 static
check_integer_arg(VAList * vargs)475 rc_t check_integer_arg ( VAList *vargs )
476 {
477 if ( vargs -> idx == vargs -> dp -> argc )
478 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcInsufficient );
479
480 switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain )
481 {
482 case vtdBool:
483 case vtdInt:
484 case vtdUint:
485 break;
486 default:
487 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
488 }
489
490 switch ( VTypedescSizeof ( & vargs -> dp -> argv [ vargs -> idx ] . desc ) )
491 {
492 case 8:
493 case 16:
494 case 32:
495 case 64:
496 break;
497 default:
498 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
499 }
500
501 ++ vargs -> idx;
502 return 0;
503 }
504
505 static
extract_size_modifier(VAList * vargs,char * size_modifier)506 rc_t extract_size_modifier ( VAList *vargs, char *size_modifier )
507 {
508 switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain )
509 {
510 case vtdBool:
511 * size_modifier = 0;
512 break;
513 case vtdUint:
514 case vtdInt:
515 switch ( VTypedescSizeof ( & vargs -> dp -> argv [ vargs -> idx ] . desc ) )
516 {
517 case 8:
518 * size_modifier = 't';
519 break;
520 case 16:
521 * size_modifier = 'h';
522 break;
523 case 32:
524 * size_modifier = 0;
525 break;
526 case 64:
527 * size_modifier = 'l';
528 break;
529 default:
530 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
531 }
532 break;
533 case vtdFloat:
534 switch ( VTypedescSizeof ( & vargs -> dp -> argv [ vargs -> idx ] . desc ) )
535 {
536 case 32:
537 * size_modifier = 'h';
538 break;
539 case 64:
540 * size_modifier = 0;
541 break;
542 default:
543 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
544 }
545 break;
546 case vtdAscii:
547 * size_modifier = 0;
548 break;
549 case vtdUnicode:
550 switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . intrinsic_bits )
551 {
552 case 8:
553 * size_modifier = 0;
554 break;
555 case 16:
556 * size_modifier = 'h';
557 break;
558 case 32:
559 * size_modifier = 'l';
560 break;
561 default:
562 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
563 }
564 break;
565 default:
566 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
567 }
568 return 0;
569 }
570
571
572 /* parse_format_string
573 * parse format string and args into structured format
574 */
575 static
parse_format_string(const char * fmt_str,ParseData * pd,VAList * vargs)576 rc_t parse_format_string ( const char *fmt_str, ParseData *pd, VAList *vargs )
577 {
578 rc_t rc;
579 uint32_t i, fmt_idx;
580
581 PrintFmt *fmt = pd -> fmt;
582
583 /* initialize returned counters */
584 pd -> lit_size = 0;
585 pd -> str_idx = pd -> fmt_idx = pd -> arg_idx = 0;
586
587 /* loop over format string */
588 for ( rc = 0, i = fmt_idx = 0; fmt_str [ i ] != 0; ++ i )
589 {
590 uint32_t domain;
591 bool alternate, numeric;
592 char size_modifier, time_modifier;
593 bool has_precision/*, has_width, has_index*/;
594
595 /* loop to gather literal portions */
596 uint32_t start;
597 for ( start = i; ; ++ i )
598 {
599 /* run until we hit start of substitution token
600 or until we hit end of format string */
601 if ( fmt_str [ i ] != 0 && fmt_str [ i ] != '%' )
602 continue;
603
604 /* detect a literal string */
605 if ( i != start )
606 {
607 /* expand into overflow */
608 if ( fmt_idx == LOCAL_FMT_COUNT )
609 {
610 rc = create_overflow ( pd, fmt_idx );
611 if ( rc != 0 )
612 return rc;
613
614 fmt = pd -> fmt;
615 }
616
617 /* create a text-literal format */
618 memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] );
619 fmt [ fmt_idx ] . u . l . text = & fmt_str [ start ];
620 fmt [ fmt_idx ] . u . l . size = i - start;
621 pd -> lit_size += i - start;
622 fmt [ fmt_idx ] . fmt = spfText;
623 fmt [ fmt_idx ] . type = sptLiteral;
624
625 /* increment counter */
626 ++ fmt_idx;
627 }
628
629 /* detect escape sequence */
630 if ( fmt_str [ i ] == 0 || fmt_str [ i + 1 ] != '%' )
631 break;
632
633 /* skip over escape */
634 start = ++ i;
635 }
636
637 /* done when NUL byte is seen */
638 if ( fmt_str [ i ] == 0 )
639 break;
640
641 /* detect overflow */
642 if ( fmt_idx == LOCAL_FMT_COUNT )
643 {
644 rc = create_overflow ( pd, fmt_idx );
645 if ( rc != 0 )
646 return rc;
647
648 fmt = pd -> fmt;
649 }
650
651 /* initial format
652 * thousands_separate = false
653 * add_prefix = false
654 * force_decimal_point = false
655 * leave_trailing_zeros = false
656 * print_time = false
657 * print_date = false
658 * print_weekday = false
659 * print_timezone = false
660 * hour_24 = false
661 * sign = 0
662 * left_fill = space
663 */
664 memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] );
665 fmt [ fmt_idx ] . left_fill = ' ';
666
667 /* scan flags */
668 alternate = false;
669 while ( 1 )
670 {
671 switch ( fmt_str [ ++ i ] )
672 {
673 /* plus and space modify application of sign
674 to signed integer and floating point conversions.
675 plus overrides space. */
676 case ' ':
677 if ( fmt [ fmt_idx ] . sign == 0 )
678 case '+':
679 fmt [ fmt_idx ] . sign = fmt_str [ i ];
680 continue;
681
682 /* dash indicates left-alignment. indicate this
683 by setting "left_fill" to NUL. */
684 case '-':
685 fmt [ fmt_idx ] . left_fill = 0;
686 continue;
687
688 /* zero indicates an alternate left-fill for
689 numeric conversions. the zero is inserted before
690 any sign character in { '+', '-' or ' ' }.
691 since "left_fill" is also used to indicate
692 alignment, only store when right aligning. */
693 case '0':
694 if ( fmt [ fmt_idx ] . left_fill != 0 )
695 fmt [ fmt_idx ] . left_fill = '0';
696 continue;
697
698 /* hash indicates that the formatter should use an
699 "alternate" approach. that approach is specific
700 to the format. */
701 case '#':
702 alternate = true;
703 continue;
704
705 /* comma ( or apostrophe outside of US ) indicates
706 that the integer portion of a numeral should use
707 a comma as a thousands separator for legibility. */
708 case ',':
709 case '\'':
710 fmt [ fmt_idx ] . thousands_separate = 1;
711 continue;
712 }
713
714 /* we've hit a non-flag character */
715 break;
716 }
717
718 /* minimum field width */
719 /* has_width = false; */
720 if ( isdigit ( fmt_str [ i ] ) )
721 {
722 /* literal */
723 /*has_width = true;*/
724 fmt [ fmt_idx ] . u . f . min_field_width = fmt_str [ i ] - '0';
725 while ( isdigit ( fmt_str [ ++ i ] ) )
726 {
727 fmt [ fmt_idx ] . u . f . min_field_width *= 10;
728 fmt [ fmt_idx ] . u . f . min_field_width += fmt_str [ i ] - '0';
729 }
730 }
731 else if ( fmt_str [ i ] == '*' )
732 {
733 /* external */
734 rc = check_integer_arg ( vargs );
735 if ( rc != 0 )
736 return rc;
737
738 /*has_width = true;*/
739 fmt [ fmt_idx ] . ext_field_width = 1;
740 ++ pd -> arg_idx;
741 ++ i;
742 }
743
744 /* precision */
745 has_precision = false;
746 if ( fmt_str [ i ] == '.' )
747 {
748 /* a single dot implies a precision value of 0 */
749 has_precision = true;
750
751 if ( isdigit ( fmt_str [ ++ i ] ) )
752 {
753 /* a literal precision */
754 fmt [ fmt_idx ] . u . f . precision = fmt_str [ i ] - '0';
755 while ( isdigit ( fmt_str [ ++ i ] ) )
756 {
757 fmt [ fmt_idx ] . u . f . precision *= 10;
758 fmt [ fmt_idx ] . u . f . precision += fmt_str [ i ] - '0';
759 }
760 }
761 else if ( fmt_str [ i ] == '*' )
762 {
763 /* external */
764 rc = check_integer_arg ( vargs );
765 if ( rc != 0 )
766 return rc;
767
768 fmt [ fmt_idx ] . ext_precision = 1;
769 ++ pd -> arg_idx;
770 ++ i;
771 }
772 else if ( fmt_str [ i ] == '-' )
773 {
774 /* eat a negative precision - treat as 0 */
775 while ( isdigit ( fmt_str [ ++ i ] ) )
776 ( void ) 0;
777 }
778 }
779
780 /* index - given when parameter is a vector */
781 /*has_index = false;*/
782 if ( fmt_str [ i ] == ':' )
783 {
784 bool has_start, has_len, has_end, end_is_stop;
785 has_start = has_len = has_end = end_is_stop = false;
786
787 /* parameter is taken as a vector,
788 with a default index starting at 0 */
789 /*has_index = true;*/
790
791 if ( isdigit ( fmt_str [ ++ i ] ) )
792 {
793 /* literal index */
794 fmt [ fmt_idx ] . u . f . start_idx = fmt_str [ i ] - '0';
795 while ( isdigit ( fmt_str [ ++ i ] ) )
796 {
797 fmt [ fmt_idx ] . u . f . start_idx *= 10;
798 fmt [ fmt_idx ] . u . f . start_idx += fmt_str [ i ] - '0';
799 }
800 has_start = true;
801 }
802 else switch ( fmt_str [ i ] )
803 {
804 case '*':
805 /* external */
806 rc = check_integer_arg ( vargs );
807 if ( rc != 0 )
808 return rc;
809
810 fmt [ fmt_idx ] . ext_start_index = 1;
811 ++ pd -> arg_idx;
812 ++ i;
813 has_start = true;
814 break;
815 case '$':
816 fmt [ fmt_idx ] . inf_start_index = 1;
817 fmt [ fmt_idx ] . ext_start_index = 1;
818 ++ pd -> arg_idx;
819 ++ i;
820 has_start = true;
821 break;
822 }
823
824 /* detect range */
825 switch ( fmt_str [ i ] )
826 {
827 /* given as start-stop */
828 case '-':
829 end_is_stop = true;
830
831 /* given as start/len */
832 case '/':
833
834 has_len = true;
835
836 if ( isdigit ( fmt_str [ ++ i ] ) )
837 {
838 /* literal selection length or end */
839 fmt [ fmt_idx ] . u . f . select_len = fmt_str [ i ] - '0';
840 while ( isdigit ( fmt_str [ ++ i ] ) )
841 {
842 fmt [ fmt_idx ] . u . f . select_len *= 10;
843 fmt [ fmt_idx ] . u . f . select_len += fmt_str [ i ] - '0';
844 }
845 has_end = true;
846 }
847 else switch ( fmt_str [ i ] )
848 {
849 case '*':
850 /* external */
851 rc = check_integer_arg ( vargs );
852 if ( rc != 0 )
853 return rc;
854
855 /* external selection length or end */
856 fmt [ fmt_idx ] . ext_stop_index = end_is_stop;
857 fmt [ fmt_idx ] . ext_select_len = ! end_is_stop;
858 ++ pd -> arg_idx;
859 ++ i;
860 has_end = true;
861 break;
862 case '$':
863 /* ignore index end if start is infinite */
864 if ( ! fmt [ fmt_idx ] . inf_start_index )
865 {
866 fmt [ fmt_idx ] . inf_stop_index = 1;
867 fmt [ fmt_idx ] . ext_stop_index = 1;
868 ++ pd -> arg_idx;
869 end_is_stop = has_end = true;
870 }
871 ++ i;
872 break;
873 case '-':
874 /* negatives are garbage */
875 while ( isdigit ( fmt_str [ ++ i ] ) )
876 ( void ) 0;
877 break;
878 default:
879 end_is_stop = false;
880 }
881 break;
882 }
883
884 if ( ! has_len && has_start )
885 fmt [ fmt_idx ] . u . f . select_len = 1;
886 }
887
888 /* size - accept for brownie-points and for KTime */
889 size_modifier = time_modifier = 0;
890 switch ( fmt_str [ i ] )
891 {
892 /* "Tiny" modifier - like "hh" in C format */
893 case 't':
894 /* "Half" modifier - same as C format */
895 case 'h':
896 /* "Long" modifier - means 64-bit for integers, otherwise like C */
897 case 'l':
898 size_modifier = time_modifier = fmt_str [ i ++ ];
899 break;
900 /* "siZe" modifier - whatever the size of size_t is */
901 case 'z':
902 ++ i;
903 time_modifier = 'z';
904 if ( sizeof ( size_t ) == sizeof ( uint64_t ) )
905 size_modifier = 'l';
906 break;
907 }
908
909 /* output format
910 describes the formatting to apply on output
911 if precision has not been set, give it a default value */
912 domain = 0;
913 numeric = false;
914 switch ( fmt_str [ i ] )
915 {
916 /* decimal signed integer */
917 case 'd':
918 case 'i':
919 fmt [ fmt_idx ] . radix = 10;
920 fmt [ fmt_idx ] . fmt = spfSignedInt;
921 numeric = true;
922 if ( ! has_precision )
923 fmt [ fmt_idx ] . u . f . precision = 1;
924 else if ( fmt [ fmt_idx ] . left_fill == '0' )
925 fmt [ fmt_idx ] . left_fill = ' ';
926 domain = vtdInt;
927 break;
928
929 /* decimal unsigned integer */
930 case 'u':
931 fmt [ fmt_idx ] . radix = 10;
932 unsigned_int:
933 fmt [ fmt_idx ] . fmt = spfUnsigned;
934 fmt [ fmt_idx ] . sign = 0;
935 numeric = true;
936 if ( ! has_precision )
937 fmt [ fmt_idx ] . u . f . precision = 1;
938 else if ( fmt [ fmt_idx ] . left_fill == '0' )
939 fmt [ fmt_idx ] . left_fill = ' ';
940 domain = vtdUint;
941 break;
942
943 /* hex unsigned integer */
944 case 'x':
945 fmt [ fmt_idx ] . add_prefix = alternate;
946 fmt [ fmt_idx ] . radix = 16;
947 goto unsigned_int;
948
949 /* upper-case hex unsigned integer */
950 case 'X':
951 fmt [ fmt_idx ] . upper_case_num = 1;
952 fmt [ fmt_idx ] . add_prefix = alternate;
953 fmt [ fmt_idx ] . radix = 16;
954 goto unsigned_int;
955
956 /* octal unsigned integer */
957 case 'o':
958 fmt [ fmt_idx ] . add_prefix = alternate;
959 fmt [ fmt_idx ] . radix = 8;
960 goto unsigned_int;
961
962 /* binary unsigned integer */
963 case 'b':
964 fmt [ fmt_idx ] . add_prefix = alternate;
965 fmt [ fmt_idx ] . radix = 2;
966 goto unsigned_int;
967
968 /* decimal signed floating point */
969 case 'f':
970 fmt [ fmt_idx ] . fmt = spfStdFloat;
971 fmt_float:
972 fmt [ fmt_idx ] . radix = 10;
973 fmt [ fmt_idx ] . force_decimal_point = alternate;
974 numeric = true;
975 if ( ! has_precision )
976 fmt [ fmt_idx ] . u . f . precision = 6;
977 domain = vtdFloat;
978 break;
979
980 /* scientific notation floating point */
981 case 'e':
982 fmt [ fmt_idx ] . fmt = spfSciFloat;
983 goto fmt_float;
984
985 /* "general" floating point */
986 case 'g':
987 fmt [ fmt_idx ] . leave_trailing_zeros = alternate;
988 fmt [ fmt_idx ] . fmt = spfGenFloat;
989 goto fmt_float;
990
991 /* character data */
992 case 's':
993 if ( ! has_precision )
994 /* no break */
995 case 'c':
996 fmt [ fmt_idx ] . u . f . precision = -1;
997 fmt [ fmt_idx ] . fmt = spfText;
998 domain = vtdUnicode;
999 break;
1000
1001 default:
1002 return RC ( rcXF, rcString, rcFormatting, rcFormat, rcUnrecognized );
1003 }
1004
1005 /* handle zero padding for non-numeric cases */
1006 if ( ! numeric && fmt [ fmt_idx ] . left_fill == '0' )
1007 fmt [ fmt_idx ] . left_fill = ' ';
1008
1009 /* take size from actual parameter */
1010 rc = extract_size_modifier ( vargs, & size_modifier );
1011 if ( rc != 0 )
1012 return rc;
1013
1014 /* determine type from argument */
1015 switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain )
1016 {
1017 case vtdBool:
1018 case vtdUint:
1019 switch ( domain )
1020 {
1021 case vtdBool:
1022 case vtdUint:
1023 case vtdInt:
1024 break;
1025 case vtdFloat:
1026 fmt [ fmt_idx ] . type_cast = 1;
1027 break;
1028 default:
1029 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1030 }
1031
1032 switch ( size_modifier )
1033 {
1034 case 't':
1035 fmt [ fmt_idx ] . type = sptUnsignedInt8Vect;
1036 break;
1037 case 'h':
1038 fmt [ fmt_idx ] . type = sptUnsignedInt16Vect;
1039 break;
1040 case 0:
1041 fmt [ fmt_idx ] . type = sptUnsignedInt32Vect;
1042 break;
1043 case 'l':
1044 fmt [ fmt_idx ] . type = sptUnsignedInt64Vect;
1045 break;
1046 }
1047 break;
1048
1049 case vtdInt:
1050 switch ( domain )
1051 {
1052 case vtdBool:
1053 case vtdUint:
1054 case vtdInt:
1055 break;
1056 case vtdFloat:
1057 fmt [ fmt_idx ] . type_cast = 1;
1058 break;
1059 default:
1060 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1061 }
1062
1063 switch ( size_modifier )
1064 {
1065 case 't':
1066 fmt [ fmt_idx ] . type = sptSignedInt8Vect;
1067 break;
1068 case 'h':
1069 fmt [ fmt_idx ] . type = sptSignedInt16Vect;
1070 break;
1071 case 0:
1072 fmt [ fmt_idx ] . type = sptSignedInt32Vect;
1073 break;
1074 case 'l':
1075 fmt [ fmt_idx ] . type = sptSignedInt64Vect;
1076 break;
1077 }
1078 break;
1079
1080 case vtdFloat:
1081 switch ( domain )
1082 {
1083 case vtdBool:
1084 case vtdUint:
1085 case vtdInt:
1086 fmt [ fmt_idx ] . type_cast = 1;
1087 break;
1088 case vtdFloat:
1089 break;
1090 default:
1091 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1092 }
1093
1094 switch ( size_modifier )
1095 {
1096 case 'h':
1097 fmt [ fmt_idx ] . type = sptFloat32Vect;
1098 break;
1099 case 0:
1100 fmt [ fmt_idx ] . type = sptFloat64Vect;
1101 break;
1102 }
1103 break;
1104
1105 case vtdAscii:
1106 switch ( domain )
1107 {
1108 case vtdAscii:
1109 case vtdUnicode:
1110 if ( size_modifier != 0 )
1111 fmt [ fmt_idx ] . type_cast = 1;
1112 break;
1113 default:
1114 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1115 }
1116
1117 fmt [ fmt_idx ] . type = sptString;
1118
1119 ++ pd -> str_idx;
1120 break;
1121
1122 case vtdUnicode:
1123 switch ( domain )
1124 {
1125 case vtdAscii:
1126 case vtdUnicode:
1127 switch ( size_modifier )
1128 {
1129 case 0:
1130 fmt [ fmt_idx ] . type = sptString;
1131 break;
1132 case 'h':
1133 fmt [ fmt_idx ] . type_cast = 1;
1134 fmt [ fmt_idx ] . type = sptUCS2String;
1135 break;
1136 case 'l':
1137 fmt [ fmt_idx ] . type_cast = 1;
1138 fmt [ fmt_idx ] . type = sptUTF32String;
1139 break;
1140 }
1141 break;
1142 default:
1143 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1144 }
1145
1146 ++ pd -> str_idx;
1147 break;
1148 }
1149
1150 /* account for format argument */
1151 ++ fmt_idx;
1152 ++ pd -> arg_idx;
1153 ++ vargs -> idx;
1154 }
1155
1156 /* record final fmt */
1157 if ( rc == 0 )
1158 {
1159 if ( fmt_idx == LOCAL_FMT_COUNT )
1160 {
1161 rc = create_overflow ( pd, fmt_idx );
1162 if ( rc != 0 )
1163 return rc;
1164
1165 fmt = pd -> fmt;
1166 }
1167
1168 memset ( & fmt [ fmt_idx ++ ], 0, sizeof fmt [ 0 ] );
1169
1170 /* if not all arguments were consumed, should this be an error? */
1171 if ( vargs -> idx != vargs -> dp -> argc )
1172 {
1173 /* produce warning */
1174 }
1175 }
1176
1177 pd -> fmt_idx = fmt_idx;
1178
1179 return rc;
1180 }
1181
1182 VTRANSFACT_IMPL ( vdb_sprintf, 1, 0, 0 ) ( const void *self, const VXfactInfo *info,
1183 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1184 {
1185 rc_t rc;
1186 Sprintf *obj;
1187
1188 /* const format string */
1189 VAList vargs;
1190 const char *fmt_str = cp -> argv [ 0 ] . data . ascii;
1191
1192 /* local formatting storage */
1193 PrintFmt fmt [ LOCAL_FMT_COUNT ];
1194
1195 /* data block for parse */
1196 ParseData pd;
1197 pd . fmt_size = cp -> argv [ 0 ] . count;
1198 pd . fmt = fmt;
1199
1200 /* packaged va_list */
1201 vargs . dp = dp;
1202 vargs . idx = 0;
1203
1204 /* parse the format string */
1205 rc = parse_format_string ( fmt_str, & pd, & vargs );
1206 if ( rc == 0 )
1207 {
1208 /* the object size:
1209 literal data bytes +
1210 space for PrintFmt +
1211 space for PrintArg */
1212 size_t obj_extra = pd . lit_size +
1213 pd . fmt_idx * sizeof ( PrintFmt ) +
1214 pd . arg_idx * sizeof ( PrintArg ) +
1215 pd . str_idx * sizeof ( String );
1216 obj = malloc ( sizeof * obj + 1 + obj_extra );
1217 if ( obj == NULL )
1218 rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
1219 else
1220 {
1221 uint32_t i;
1222
1223 char *lit;
1224 PrintFmt *dfmt;
1225 size_t lit_size;
1226
1227 obj -> args = ( void* ) ( obj + 1 );
1228 dfmt = ( void* ) & obj -> args [ pd . arg_idx ];
1229 obj -> str = ( void* ) & dfmt [ pd . fmt_idx ];
1230 lit = ( void* ) & obj -> str [ pd . str_idx ];
1231 obj -> fmt = dfmt;
1232
1233 /* not necessary or even helpful, but doesn't cost much */
1234 memset ( obj -> args, 0, pd . arg_idx * sizeof obj -> args [ 0 ] );
1235 memset ( obj -> str, 0, pd . str_idx * sizeof obj -> str [ 0 ] );
1236
1237 /* copy format and literals */
1238 for ( lit_size = 0, i = 0; i < pd . fmt_idx; ++ i )
1239 {
1240 /* simple copy */
1241 dfmt [ i ] = pd . fmt [ i ];
1242 if ( pd . fmt [ i ] . type == sptLiteral )
1243 {
1244 /* copy over literal data */
1245 dfmt [ i ] . u . l . text = & lit [ lit_size ];
1246 memmove ( & lit [ lit_size ],
1247 pd . fmt [ i ] . u . l . text,
1248 pd . fmt [ i ] . u . l . size );
1249 lit_size += pd . fmt [ i ] . u . l . size;
1250 }
1251 }
1252
1253 /* NUL-terminate the literal text - again, doesn't help but doesn't hurt */
1254 lit [ lit_size ] = 0;
1255 }
1256 }
1257
1258 /* douse any overflow memory used */
1259 if ( pd . fmt != fmt )
1260 KDataBufferWhack ( & pd . overflow );
1261
1262 #if _DEBUGGING
1263 if ( rc == 0 )
1264 {
1265 rc = validate_obj ( obj, true );
1266 if ( rc != 0 )
1267 KDbgMsg ( "%s - self is bad on factory exit: %R\n", __func__, rc );
1268 }
1269 #endif
1270
1271 if ( rc == 0 )
1272 {
1273 rslt -> self = obj;
1274 rslt -> whack = free;
1275 rslt -> u . rf = sprintf_func;
1276 rslt -> variant = vftRow;
1277 }
1278
1279 return rc;
1280 }
1281