1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 #include <vdb/extern.h>
27 
28 #include <vdb/xform.h>
29 #include <vdb/schema.h>
30 #include <klib/data-buffer.h>
31 #include <klib/text.h>
32 #include <klib/printf.h>
33 #include <klib/debug.h>
34 #include <klib/rc.h>
35 #include <sysalloc.h>
36 
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <os-native.h>
41 #include <assert.h>
42 
43 
44 /* the number of PrintFmt, PrintArg and String
45    elements to allocate in function-local storage */
46 #define LOCAL_FMT_COUNT 64
47 
48 
49 typedef struct Sprintf Sprintf;
50 struct Sprintf
51 {
52     const PrintFmt *fmt;
53     PrintArg *args;
54     String *str;
55 };
56 
57 static uint8_t const radix2_size [ 4 ] = { 8, 16, 32, 64 };
58 static uint8_t const radix8_size [ 4 ] = { 3, 6, 11, 22 };
59 static uint8_t const radix10_size [ 4 ] = { 3, 5, 10, 20 };
60 static uint8_t const radix16_size [ 4 ] = { 2, 4, 8, 16 };
61 
62 #if _DEBUGGING
63 static
validate_obj(const Sprintf * obj,bool fmt_only)64 rc_t validate_obj ( const Sprintf *obj, bool fmt_only )
65 {
66     uint32_t i;
67 
68     /* validate the format blocks */
69     for ( i = 0; obj -> fmt [ i ] . type != sptTerm; ++ i )
70     {
71         /* at this point, only looking for bad formats */
72         if ( obj -> fmt [ i ] . type == sptLiteral )
73         {
74             if ( obj -> fmt [ i ] . u . l . text == NULL )
75                 return RC ( rcXF, rcFunction, rcConstructing, rcString, rcNull );
76             if ( obj -> fmt [ i ] . u . l . size == 0 )
77                 return RC ( rcXF, rcFunction, rcConstructing, rcString, rcEmpty );
78             if ( string_chr ( obj -> fmt [ i ] . u . l . text, obj -> fmt [ i ] . u . l . size, 0 ) != NULL )
79                 return RC ( rcXF, rcFunction, rcConstructing, rcString, rcInvalid );
80         }
81     }
82 
83     return 0;
84 }
85 #endif
86 
87 static
get_radix_size(const PrintFmt * fmt,uint32_t selector)88 size_t get_radix_size ( const PrintFmt *fmt, uint32_t selector )
89 {
90     const uint8_t *radix_size;
91     switch ( fmt -> radix )
92     {
93     case 0:
94         return 0;
95     case 10:
96         radix_size = radix10_size;
97         break;
98     case 16:
99         radix_size = radix16_size;
100         break;
101     case 8:
102         radix_size = radix8_size;
103         break;
104     default:
105         radix_size = radix2_size;
106     }
107     return radix_size [ selector ];
108 }
109 
110 typedef struct FuncArgs FuncArgs;
111 struct FuncArgs
112 {
113     const VRowData *argv;
114     uint32_t argc;
115     uint32_t idx;
116 };
117 
118 static
access_cell_data(FuncArgs * args,uint64_t * row_len,size_t * size)119 const void *access_cell_data ( FuncArgs *args, uint64_t *row_len, size_t *size )
120 {
121     size_t elem_bytes;
122     const uint8_t *data;
123 
124     if ( args -> idx == args -> argc )
125         return NULL;
126 
127     elem_bytes = args -> argv [ args -> idx ] . u . data . elem_bits / 8;
128     assert ( elem_bytes * 8 == args -> argv [ args -> idx ] . u . data . elem_bits );
129 
130     data = args -> argv [ args -> idx ] . u . data . base;
131     * row_len = args -> argv [ args -> idx ] . u . data . elem_count;
132 
133     data += args -> argv [ args -> idx ] . u . data . first_elem * elem_bytes;
134 
135     if ( size != NULL )
136         * size = ( size_t ) ( * row_len * elem_bytes );
137 
138     ++ args -> idx;
139     return data;
140 }
141 
142 static
capture_uint_scalar(FuncArgs * args,uint64_t * val)143 rc_t capture_uint_scalar ( FuncArgs *args, uint64_t *val )
144 {
145     uint64_t row_len;
146     const void *data = access_cell_data ( args, & row_len, NULL );
147 
148     if ( data == NULL )
149         return RC ( rcXF, rcFunction, rcExecuting, rcData, rcNull );
150 
151     if ( row_len != 1 )
152         return RC ( rcXF, rcFunction, rcExecuting, rcRange, rcExcessive );
153 
154     switch ( args -> argv [ args -> idx - 1 ] . u . data . elem_bits )
155     {
156     case 8:
157         * val = * ( const uint8_t* ) data;
158         break;
159     case 16:
160         * val = * ( const uint16_t* ) data;
161         break;
162     case 32:
163         * val = * ( const uint32_t* ) data;
164         break;
165     case 64:
166         * val = * ( const uint64_t* ) data;
167         break;
168     }
169 
170     return 0;
171 }
172 
173 static
sprintf_func(void * obj,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])174 rc_t CC sprintf_func ( void *obj,
175      const VXformInfo *info, int64_t row_id, VRowResult *rslt,
176      uint32_t argc, const VRowData argv [] )
177 {
178     rc_t rc;
179     size_t cell_size;
180     uint32_t str_idx, fmt_idx, arg_idx;
181 
182     Sprintf *self = obj;
183 
184     const PrintFmt *fmt = self -> fmt;
185     PrintArg *args = self -> args;
186     String *str = self -> str;
187 
188     FuncArgs vargs;
189     vargs . argv = argv;
190     vargs . argc = argc;
191     vargs . idx = 0;
192 
193     str_idx = fmt_idx = arg_idx = 0;
194 
195 #if _DEBUGGING
196     rc = validate_obj ( self, false );
197     if ( rc != 0 )
198         KDbgMsg ( "%s - self is bad on entry: %R\n", __func__, rc );
199 #endif
200 
201     /* initial pass - prepare argument list & estimate cell size */
202     for ( rc = 0, cell_size = 0; fmt [ fmt_idx ] . type != sptTerm; ++ fmt_idx )
203     {
204         size_t arg_size;
205         bool check_index;
206         uint64_t row_len, start_idx, select_len;
207         uint32_t ext_start_index, ext_stop_index;
208 
209         row_len = 0;
210 
211         /* test for external field width, precision, index */
212         if ( fmt [ fmt_idx ] . ext_field_width )
213         {
214             rc = capture_uint_scalar ( & vargs, & args [ arg_idx ++ ] . u );
215             if ( rc != 0 )
216                 return rc;
217         }
218         if ( fmt [ fmt_idx ] . ext_precision )
219         {
220             rc = capture_uint_scalar ( & vargs, & args [ arg_idx ++ ] . u );
221             if ( rc != 0 )
222                 return rc;
223         }
224 
225         if ( ! fmt [ fmt_idx ] . ext_start_index )
226             start_idx = fmt [ fmt_idx ] . u . f . start_idx;
227         else
228         {
229             /* if the start index is the last in vector,
230                this needs to be suspended until later */
231             if ( ! fmt [ fmt_idx ] . inf_start_index )
232             {
233                 rc = capture_uint_scalar ( & vargs, & start_idx );
234                 if ( rc != 0 )
235                     return rc;
236                 args [ arg_idx ] . u = start_idx;
237             }
238 
239             ext_start_index = arg_idx ++;
240         }
241 
242         if ( ! fmt [ fmt_idx ] . ext_stop_index && ! fmt [ fmt_idx ] . ext_select_len )
243             select_len = fmt [ fmt_idx ] . u . f . select_len;
244         else
245         {
246             /* if the stop index is the last in vector,
247                this needs to be suspended until later */
248             if ( ! fmt [ fmt_idx ] . inf_stop_index )
249             {
250                 rc = capture_uint_scalar ( & vargs, & select_len );
251                 if ( rc != 0 )
252                     return rc;
253                 args [ arg_idx ] . u = select_len;
254             }
255             ext_stop_index = arg_idx ++;
256         }
257 
258         /* make an estimate of the bytes needed by sprintf engine for argument
259            get row_len at the same time */
260         check_index = false;
261         switch ( fmt [ fmt_idx ] . type )
262         {
263         case sptLiteral:                     /* char literal        - arg in fmt   */
264             assert ( fmt [ fmt_idx ] . u . l . text != NULL );
265             arg_size = fmt [ fmt_idx ] . u . l . size;
266             row_len = arg_size;
267             -- arg_idx;
268             break;
269         case sptSignedInt8Vect:              /* signed vector int   - arg is d8    */
270             args [ arg_idx ] . d8 = access_cell_data ( & vargs, & row_len, NULL );
271             arg_size = get_radix_size ( & fmt [ fmt_idx ], 0 ) + 1;
272             break;
273         case sptSignedInt16Vect:             /* signed vector int   - arg is d16   */
274             args [ arg_idx ] . d16 = access_cell_data ( & vargs, & row_len, NULL );
275             arg_size = get_radix_size ( & fmt [ fmt_idx ], 1 ) + 1;
276             break;
277         case sptSignedInt32Vect:             /* signed vector int   - arg is d32   */
278             args [ arg_idx ] . d32 = access_cell_data ( & vargs, & row_len, NULL );
279             arg_size = get_radix_size ( & fmt [ fmt_idx ], 2 ) + 1;
280             break;
281         case sptSignedInt64Vect:             /* signed vector int   - arg is d64   */
282             args [ arg_idx ] . d64 = access_cell_data ( & vargs, & row_len, NULL );
283             arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 1;
284             break;
285         case sptUnsignedInt8Vect:            /* unsigned vector int - arg is u8    */
286             args [ arg_idx ] . u8 = access_cell_data ( & vargs, & row_len, NULL );
287             arg_size = get_radix_size ( & fmt [ fmt_idx ], 0 ) + 2;
288             break;
289         case sptUnsignedInt16Vect:           /* unsigned vector int - arg is u16   */
290             args [ arg_idx ] . u16 = access_cell_data ( & vargs, & row_len, NULL );
291             arg_size = get_radix_size ( & fmt [ fmt_idx ], 1 ) + 2;
292             break;
293         case sptUnsignedInt32Vect:           /* unsigned vector int - arg is u32   */
294             args [ arg_idx ] . u32 = access_cell_data ( & vargs, & row_len, NULL );
295             arg_size = get_radix_size ( & fmt [ fmt_idx ], 2 ) + 2;
296             break;
297         case sptUnsignedInt64Vect:           /* unsigned vector int - arg is u64   */
298             args [ arg_idx ] . u64 = access_cell_data ( & vargs, & row_len, NULL );
299             arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 2;
300             break;
301         case sptFloat32Vect:                 /* vector float        - arg is f32   */
302             args [ arg_idx ] . f32 = access_cell_data ( & vargs, & row_len, NULL );
303             arg_size = 32;
304             break;
305         case sptFloat64Vect:                 /* vector float        - arg is f64   */
306             args [ arg_idx ] . f64 = access_cell_data ( & vargs, & row_len, NULL );
307             arg_size = 32;
308             break;
309         case sptFloatLongVect:               /* vector float        - arg is flong */
310             args [ arg_idx ] . flong = access_cell_data ( & vargs, & row_len, NULL );
311             arg_size = 32;
312             break;
313         case sptString:                      /* vector character    - arg is S     */
314             str [ str_idx ] . addr = access_cell_data ( & vargs, & row_len, & str [ str_idx ] . size );
315             str [ str_idx ] . len = ( uint32_t ) str [ str_idx ] . size;
316             arg_size = str [ str_idx ] . size;
317             args [ arg_idx ] . S = & str [ str_idx ++ ];
318             check_index = true;
319             break;
320         case sptUCS2String:                  /* vector character    - arg is S     */
321         case sptUTF32String:                 /* vector character    - arg is S     */
322             return RC ( rcXF, rcFunction, rcExecuting, rcType, rcUnsupported );
323         case sptRowId:                       /* current row id      - arg is d     */
324             args [ arg_idx ] . d = row_id;
325             arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 1;
326             ++ arg_idx;
327             continue;
328         case sptRowLen:                      /* current row length  - arg is u     */
329             access_cell_data ( & vargs, & args [ arg_idx ] . u, NULL );
330             arg_size = get_radix_size ( & fmt [ fmt_idx ], 3 ) + 2;
331             ++ arg_idx;
332             continue;
333         }
334 
335         /* back-fill based upon row-length in case of non-empty rows */
336         if ( row_len != 0 )
337         {
338             /* start index is last in row */
339             if ( fmt [ fmt_idx ] . inf_start_index )
340                 start_idx = args [ ext_start_index ] . u = row_len - 1;
341 
342             /* end index is last in row */
343             if ( fmt [ fmt_idx ] . inf_stop_index )
344                 select_len = args [ ext_stop_index ] . u = row_len - 1;
345         }
346 
347         /* detect runtime error condition */
348         if ( start_idx >= row_len )
349             args [ arg_idx ] . p = NULL;
350         else
351         {
352             /* adjust size by index */
353             if ( check_index )
354             {
355                 /* the select_len should be correct,
356                    unless the end is a stop coordinate */
357                 if ( fmt [ fmt_idx ] . ext_stop_index )
358                 {
359                     if ( select_len <= start_idx )
360                         select_len = 1;
361                     else
362                         select_len -= start_idx - 1;
363                 }
364 
365                 /* select_len of 0 means infinite */
366                 if ( select_len == 0 )
367                     select_len = row_len - start_idx;
368 
369                 /* real size after sub-str */
370                 arg_size = argv [ vargs . idx - 1 ] . u . data . elem_bits * select_len / 8;
371             }
372 
373             /* incorporate size into calcuation */
374             cell_size += arg_size;
375         }
376 
377         /* done with argument */
378         ++ arg_idx;
379     }
380 
381     /* round cell-size up to nearest 4K */
382     cell_size = ( cell_size + 4095 ) & ~ ( size_t ) 4095;
383 
384     /* set output buffer size */
385     rc = KDataBufferResize( rslt -> data, cell_size );
386     if ( rc == 0 )
387     {
388 #if _DEBUGGING
389         rc = validate_obj ( self, false );
390         if ( rc != 0 )
391             KDbgMsg ( "%s - self is bad before invoking structured_sprintf: %R\n", __func__, rc );
392 #endif
393         /* invoke sprintf engine */
394         rc = structured_sprintf ( rslt -> data -> base,
395             rslt -> data -> elem_count, & cell_size, fmt, args );
396 
397         /* recover from buffer insufficient */
398         if ( GetRCState ( rc ) == rcInsufficient && GetRCObject ( rc ) == (enum RCObject)rcBuffer )
399         {
400             rc = KDataBufferResize ( rslt -> data, cell_size + 1 );
401             if ( rc == 0 )
402             {
403                 rc = structured_sprintf ( rslt -> data -> base,
404                     rslt -> data -> elem_count, & cell_size, fmt, args );
405             }
406         }
407 
408         if ( rc == 0 )
409         {
410             /* set the return parameters */
411             rc = KDataBufferResize ( rslt -> data, cell_size );
412             if ( rc == 0 )
413             {
414                 rslt -> elem_count = cell_size;
415                 rslt -> elem_bits = 8;
416             }
417         }
418     }
419 
420     return rc;
421 }
422 
423 typedef struct ParseData ParseData;
424 struct ParseData
425 {
426     KDataBuffer overflow;
427     size_t fmt_size;
428     size_t lit_size;
429     PrintFmt *fmt;
430     uint32_t str_idx;
431     uint32_t fmt_idx;
432     uint32_t arg_idx;
433 };
434 
435 /* create_overflow
436  *  create a buffer so large that it cannot overflow again
437  *  copy in existing structures
438  */
439 static
create_overflow(ParseData * pd,uint32_t fmt_idx)440 rc_t create_overflow ( ParseData *pd, uint32_t fmt_idx )
441 {
442     /* infer that we can never have more format items than characters in the string... */
443 
444     /* our size will create a format for every byte.
445        this should be a total overkill. */
446     size_t buff_size = sizeof pd -> fmt [ 0 ] * pd -> fmt_size;
447 
448     /* make the buffer */
449     rc_t rc = KDataBufferMakeBytes ( & pd -> overflow, buff_size );
450     if ( rc == 0 )
451     {
452         /* capture pointers to stack structures */
453         const PrintFmt *fmt = pd -> fmt;
454 
455         /* destination pointer */
456         pd -> fmt = pd -> overflow . base;
457 
458         /* copy existing data */
459         memmove ( pd -> fmt, fmt, fmt_idx * sizeof pd -> fmt [ 0 ] );
460    }
461 
462     pd -> fmt_idx = fmt_idx;
463 
464     return rc;
465 }
466 
467 typedef struct VAList VAList;
468 struct VAList
469 {
470     const VFunctionParams *dp;
471     uint32_t idx;
472 };
473 
474 static
check_integer_arg(VAList * vargs)475 rc_t check_integer_arg ( VAList *vargs )
476 {
477     if ( vargs -> idx == vargs -> dp -> argc )
478         return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcInsufficient );
479 
480     switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain )
481     {
482     case vtdBool:
483     case vtdInt:
484     case vtdUint:
485         break;
486     default:
487         return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
488     }
489 
490     switch ( VTypedescSizeof ( & vargs -> dp -> argv [ vargs -> idx ] . desc ) )
491     {
492     case 8:
493     case 16:
494     case 32:
495     case 64:
496         break;
497     default:
498         return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
499     }
500 
501     ++ vargs -> idx;
502     return 0;
503 }
504 
505 static
extract_size_modifier(VAList * vargs,char * size_modifier)506 rc_t extract_size_modifier ( VAList *vargs, char *size_modifier )
507 {
508     switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain )
509     {
510     case vtdBool:
511         * size_modifier = 0;
512         break;
513     case vtdUint:
514     case vtdInt:
515         switch ( VTypedescSizeof ( & vargs -> dp -> argv [ vargs -> idx ] . desc ) )
516         {
517         case 8:
518             * size_modifier = 't';
519             break;
520         case 16:
521             * size_modifier = 'h';
522             break;
523         case 32:
524             * size_modifier = 0;
525             break;
526         case 64:
527             * size_modifier = 'l';
528             break;
529         default:
530             return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
531         }
532         break;
533     case vtdFloat:
534         switch ( VTypedescSizeof ( & vargs -> dp -> argv [ vargs -> idx ] . desc ) )
535         {
536         case 32:
537             * size_modifier = 'h';
538             break;
539         case 64:
540             * size_modifier = 0;
541             break;
542         default:
543             return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
544         }
545         break;
546     case vtdAscii:
547         * size_modifier = 0;
548         break;
549     case vtdUnicode:
550         switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . intrinsic_bits )
551         {
552         case 8:
553             * size_modifier = 0;
554             break;
555         case 16:
556             * size_modifier = 'h';
557             break;
558         case 32:
559             * size_modifier = 'l';
560             break;
561         default:
562             return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
563         }
564         break;
565     default:
566         return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
567     }
568     return 0;
569 }
570 
571 
572 /* parse_format_string
573  *  parse format string and args into structured format
574  */
575 static
parse_format_string(const char * fmt_str,ParseData * pd,VAList * vargs)576 rc_t parse_format_string ( const char *fmt_str, ParseData *pd, VAList *vargs )
577 {
578     rc_t rc;
579     uint32_t i, fmt_idx;
580 
581     PrintFmt *fmt = pd -> fmt;
582 
583     /* initialize returned counters */
584     pd -> lit_size = 0;
585     pd -> str_idx = pd -> fmt_idx = pd -> arg_idx = 0;
586 
587     /* loop over format string */
588     for ( rc = 0, i = fmt_idx = 0; fmt_str [ i ] != 0; ++ i )
589     {
590         uint32_t domain;
591         bool alternate, numeric;
592         char size_modifier, time_modifier;
593         bool has_precision/*, has_width, has_index*/;
594 
595         /* loop to gather literal portions */
596         uint32_t start;
597         for ( start = i; ; ++ i )
598         {
599             /* run until we hit start of substitution token
600                or until we hit end of format string */
601             if ( fmt_str [ i ] != 0 && fmt_str [ i ] != '%' )
602                 continue;
603 
604             /* detect a literal string */
605             if ( i != start )
606             {
607                 /* expand into overflow */
608                 if ( fmt_idx == LOCAL_FMT_COUNT )
609                 {
610                     rc = create_overflow ( pd, fmt_idx );
611                     if ( rc != 0 )
612                         return rc;
613 
614                     fmt = pd -> fmt;
615                 }
616 
617                 /* create a text-literal format */
618                 memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] );
619                 fmt [ fmt_idx ] . u . l . text = & fmt_str [ start ];
620                 fmt [ fmt_idx ] . u . l . size = i - start;
621                 pd -> lit_size += i - start;
622                 fmt [ fmt_idx ] . fmt = spfText;
623                 fmt [ fmt_idx ] . type = sptLiteral;
624 
625                 /* increment counter */
626                 ++ fmt_idx;
627             }
628 
629             /* detect escape sequence */
630             if ( fmt_str [ i ] == 0 || fmt_str [ i + 1 ] != '%' )
631                 break;
632 
633             /* skip over escape */
634             start = ++ i;
635         }
636 
637         /* done when NUL byte is seen */
638         if ( fmt_str [ i ] == 0 )
639             break;
640 
641         /* detect overflow */
642         if ( fmt_idx == LOCAL_FMT_COUNT )
643         {
644             rc = create_overflow ( pd, fmt_idx );
645             if ( rc != 0 )
646                 return rc;
647 
648             fmt = pd -> fmt;
649         }
650 
651         /* initial format
652          *  thousands_separate    = false
653          *  add_prefix            = false
654          *  force_decimal_point   = false
655          *  leave_trailing_zeros  = false
656          *  print_time            = false
657          *  print_date            = false
658          *  print_weekday         = false
659          *  print_timezone        = false
660          *  hour_24               = false
661          *  sign                  = 0
662          *  left_fill             = space
663          */
664         memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] );
665         fmt [ fmt_idx ] . left_fill = ' ';
666 
667         /* scan flags */
668         alternate = false;
669         while ( 1 )
670         {
671             switch ( fmt_str [ ++ i ] )
672             {
673                 /* plus and space modify application of sign
674                    to signed integer and floating point conversions.
675                    plus overrides space. */
676             case ' ':
677                 if ( fmt [ fmt_idx ] . sign == 0 )
678             case '+':
679                     fmt [ fmt_idx ] . sign = fmt_str [ i ];
680                 continue;
681 
682                 /* dash indicates left-alignment. indicate this
683                    by setting "left_fill" to NUL. */
684             case '-':
685                 fmt [ fmt_idx ] . left_fill = 0;
686                 continue;
687 
688                 /* zero indicates an alternate left-fill for
689                    numeric conversions. the zero is inserted before
690                    any sign character in { '+', '-' or ' ' }.
691                    since "left_fill" is also used to indicate
692                    alignment, only store when right aligning. */
693             case '0':
694                 if ( fmt [ fmt_idx ] . left_fill != 0 )
695                     fmt [ fmt_idx ] . left_fill = '0';
696                 continue;
697 
698                 /* hash indicates that the formatter should use an
699                    "alternate" approach. that approach is specific
700                    to the format. */
701             case '#':
702                 alternate = true;
703                 continue;
704 
705                 /* comma ( or apostrophe outside of US ) indicates
706                    that the integer portion of a numeral should use
707                    a comma as a thousands separator for legibility. */
708             case ',':
709             case '\'':
710                 fmt [ fmt_idx ] . thousands_separate = 1;
711                 continue;
712             }
713 
714             /* we've hit a non-flag character */
715             break;
716         }
717 
718         /* minimum field width */
719         /* has_width = false; */
720         if ( isdigit ( fmt_str [ i ] ) )
721         {
722             /* literal */
723             /*has_width = true;*/
724             fmt [ fmt_idx ] . u . f . min_field_width = fmt_str [ i ] - '0';
725             while ( isdigit ( fmt_str [ ++ i ] ) )
726             {
727                 fmt [ fmt_idx ] . u . f . min_field_width *= 10;
728                 fmt [ fmt_idx ] . u . f . min_field_width += fmt_str [ i ] - '0';
729             }
730         }
731         else if ( fmt_str [ i ] == '*' )
732         {
733             /* external */
734             rc = check_integer_arg ( vargs );
735             if ( rc != 0 )
736                 return rc;
737 
738             /*has_width = true;*/
739             fmt [ fmt_idx ] . ext_field_width = 1;
740             ++ pd -> arg_idx;
741             ++ i;
742         }
743 
744         /* precision */
745         has_precision = false;
746         if ( fmt_str [ i ] == '.' )
747         {
748             /* a single dot implies a precision value of 0 */
749             has_precision = true;
750 
751             if ( isdigit ( fmt_str [ ++ i ] ) )
752             {
753                 /* a literal precision */
754                 fmt [ fmt_idx ] . u . f . precision = fmt_str [ i ] - '0';
755                 while ( isdigit ( fmt_str [ ++ i ] ) )
756                 {
757                     fmt [ fmt_idx ] . u . f . precision *= 10;
758                     fmt [ fmt_idx ] . u . f . precision += fmt_str [ i ] - '0';
759                 }
760             }
761             else if ( fmt_str [ i ] == '*' )
762             {
763                 /* external */
764                 rc = check_integer_arg ( vargs );
765                 if ( rc != 0 )
766                     return rc;
767 
768                 fmt [ fmt_idx ] . ext_precision = 1;
769                 ++ pd -> arg_idx;
770                 ++ i;
771             }
772             else if ( fmt_str [ i ] == '-' )
773             {
774                 /* eat a negative precision - treat as 0 */
775                 while ( isdigit ( fmt_str [ ++ i ] ) )
776                     ( void ) 0;
777             }
778         }
779 
780         /* index - given when parameter is a vector */
781         /*has_index = false;*/
782         if ( fmt_str [ i ] == ':' )
783         {
784             bool has_start, has_len, has_end, end_is_stop;
785             has_start = has_len = has_end = end_is_stop = false;
786 
787             /* parameter is taken as a vector,
788                with a default index starting at 0 */
789             /*has_index = true;*/
790 
791             if ( isdigit ( fmt_str [ ++ i ] ) )
792             {
793                 /* literal index */
794                 fmt [ fmt_idx ] . u . f . start_idx = fmt_str [ i ] - '0';
795                 while ( isdigit ( fmt_str [ ++ i ] ) )
796                 {
797                     fmt [ fmt_idx ] . u . f . start_idx *= 10;
798                     fmt [ fmt_idx ] . u . f . start_idx += fmt_str [ i ] - '0';
799                 }
800                 has_start = true;
801             }
802             else switch ( fmt_str [ i ] )
803             {
804             case '*':
805                 /* external */
806                 rc = check_integer_arg ( vargs );
807                 if ( rc != 0 )
808                     return rc;
809 
810                 fmt [ fmt_idx ] . ext_start_index = 1;
811                 ++ pd -> arg_idx;
812                 ++ i;
813                 has_start = true;
814                 break;
815             case '$':
816                 fmt [ fmt_idx ] . inf_start_index = 1;
817                 fmt [ fmt_idx ] . ext_start_index = 1;
818                 ++ pd -> arg_idx;
819                 ++ i;
820                 has_start = true;
821                 break;
822             }
823 
824             /* detect range */
825             switch ( fmt_str [ i ] )
826             {
827                 /* given as start-stop */
828             case '-':
829                 end_is_stop = true;
830 
831                 /* given as start/len */
832             case '/':
833 
834                 has_len = true;
835 
836                 if ( isdigit ( fmt_str [ ++ i ] ) )
837                 {
838                     /* literal selection length or end */
839                     fmt [ fmt_idx ] . u . f . select_len = fmt_str [ i ] - '0';
840                     while ( isdigit ( fmt_str [ ++ i ] ) )
841                     {
842                         fmt [ fmt_idx ] . u . f . select_len *= 10;
843                         fmt [ fmt_idx ] . u . f . select_len += fmt_str [ i ] - '0';
844                     }
845                     has_end = true;
846                 }
847                 else switch ( fmt_str [ i ] )
848                 {
849                 case '*':
850                     /* external */
851                     rc = check_integer_arg ( vargs );
852                     if ( rc != 0 )
853                         return rc;
854 
855                     /* external selection length or end */
856                     fmt [ fmt_idx ] . ext_stop_index = end_is_stop;
857                     fmt [ fmt_idx ] . ext_select_len = ! end_is_stop;
858                     ++ pd -> arg_idx;
859                     ++ i;
860                     has_end = true;
861                     break;
862                 case '$':
863                     /* ignore index end if start is infinite */
864                     if ( ! fmt [ fmt_idx ] . inf_start_index )
865                     {
866                         fmt [ fmt_idx ] . inf_stop_index = 1;
867                         fmt [ fmt_idx ] . ext_stop_index = 1;
868                         ++ pd -> arg_idx;
869                         end_is_stop = has_end = true;
870                     }
871                     ++ i;
872                     break;
873                 case '-':
874                     /* negatives are garbage */
875                     while ( isdigit ( fmt_str [ ++ i ] ) )
876                         ( void ) 0;
877                     break;
878                 default:
879                     end_is_stop = false;
880                 }
881                 break;
882             }
883 
884             if ( ! has_len && has_start )
885                 fmt [ fmt_idx ] . u . f . select_len = 1;
886         }
887 
888         /* size - accept for brownie-points and for KTime */
889         size_modifier = time_modifier = 0;
890         switch ( fmt_str [ i ] )
891         {
892             /* "Tiny" modifier - like "hh" in C format */
893         case 't':
894             /* "Half" modifier - same as C format */
895         case 'h':
896             /* "Long" modifier - means 64-bit for integers, otherwise like C */
897         case 'l':
898             size_modifier = time_modifier = fmt_str [ i ++ ];
899             break;
900             /* "siZe" modifier - whatever the size of size_t is */
901         case 'z':
902             ++ i;
903             time_modifier = 'z';
904             if ( sizeof ( size_t ) == sizeof ( uint64_t ) )
905                 size_modifier = 'l';
906             break;
907         }
908 
909         /* output format
910            describes the formatting to apply on output
911            if precision has not been set, give it a default value */
912         domain = 0;
913         numeric = false;
914         switch ( fmt_str [ i ] )
915         {
916             /* decimal signed integer */
917         case 'd':
918         case 'i':
919             fmt [ fmt_idx ] . radix = 10;
920             fmt [ fmt_idx ] . fmt = spfSignedInt;
921             numeric = true;
922             if ( ! has_precision )
923                 fmt [ fmt_idx ] . u . f . precision = 1;
924             else if ( fmt [ fmt_idx ] . left_fill == '0' )
925                 fmt [ fmt_idx ] . left_fill = ' ';
926             domain = vtdInt;
927             break;
928 
929             /* decimal unsigned integer */
930         case 'u':
931             fmt [ fmt_idx ] . radix = 10;
932         unsigned_int:
933             fmt [ fmt_idx ] . fmt = spfUnsigned;
934             fmt [ fmt_idx ] . sign = 0;
935             numeric = true;
936             if ( ! has_precision )
937                 fmt [ fmt_idx ] . u . f . precision = 1;
938             else if ( fmt [ fmt_idx ] . left_fill == '0' )
939                 fmt [ fmt_idx ] . left_fill = ' ';
940             domain = vtdUint;
941             break;
942 
943             /* hex unsigned integer */
944         case 'x':
945             fmt [ fmt_idx ] . add_prefix = alternate;
946             fmt [ fmt_idx ] . radix = 16;
947             goto unsigned_int;
948 
949             /* upper-case hex unsigned integer */
950         case 'X':
951             fmt [ fmt_idx ] . upper_case_num = 1;
952             fmt [ fmt_idx ] . add_prefix = alternate;
953             fmt [ fmt_idx ] . radix = 16;
954             goto unsigned_int;
955 
956             /* octal unsigned integer */
957         case 'o':
958             fmt [ fmt_idx ] . add_prefix = alternate;
959             fmt [ fmt_idx ] . radix = 8;
960             goto unsigned_int;
961 
962             /* binary unsigned integer */
963         case 'b':
964             fmt [ fmt_idx ] . add_prefix = alternate;
965             fmt [ fmt_idx ] . radix = 2;
966             goto unsigned_int;
967 
968             /* decimal signed floating point */
969         case 'f':
970             fmt [ fmt_idx ] . fmt = spfStdFloat;
971         fmt_float:
972             fmt [ fmt_idx ] . radix = 10;
973             fmt [ fmt_idx ] . force_decimal_point = alternate;
974             numeric = true;
975             if ( ! has_precision )
976                 fmt [ fmt_idx ] . u . f . precision = 6;
977             domain = vtdFloat;
978             break;
979 
980             /* scientific notation floating point */
981         case 'e':
982             fmt [ fmt_idx ] . fmt = spfSciFloat;
983             goto fmt_float;
984 
985             /* "general" floating point */
986         case 'g':
987             fmt [ fmt_idx ] . leave_trailing_zeros = alternate;
988             fmt [ fmt_idx ] . fmt = spfGenFloat;
989             goto fmt_float;
990 
991             /* character data */
992         case 's':
993             if ( ! has_precision )
994             /* no break */
995         case 'c':
996                 fmt [ fmt_idx ] . u . f . precision = -1;
997             fmt [ fmt_idx ] . fmt = spfText;
998             domain = vtdUnicode;
999             break;
1000 
1001         default:
1002             return RC ( rcXF, rcString, rcFormatting, rcFormat, rcUnrecognized );
1003         }
1004 
1005         /* handle zero padding for non-numeric cases */
1006         if ( ! numeric && fmt [ fmt_idx ] . left_fill == '0' )
1007             fmt [ fmt_idx ] . left_fill = ' ';
1008 
1009         /* take size from actual parameter */
1010         rc = extract_size_modifier ( vargs, & size_modifier );
1011         if ( rc != 0 )
1012             return rc;
1013 
1014         /* determine type from argument */
1015         switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain )
1016         {
1017         case vtdBool:
1018         case vtdUint:
1019             switch ( domain )
1020             {
1021             case vtdBool:
1022             case vtdUint:
1023             case vtdInt:
1024                 break;
1025             case vtdFloat:
1026                 fmt [ fmt_idx ] . type_cast = 1;
1027                 break;
1028             default:
1029                 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1030             }
1031 
1032             switch ( size_modifier )
1033             {
1034             case 't':
1035                 fmt [ fmt_idx ] . type = sptUnsignedInt8Vect;
1036                 break;
1037             case 'h':
1038                 fmt [ fmt_idx ] . type = sptUnsignedInt16Vect;
1039                 break;
1040             case 0:
1041                 fmt [ fmt_idx ] . type = sptUnsignedInt32Vect;
1042                 break;
1043             case 'l':
1044                 fmt [ fmt_idx ] . type = sptUnsignedInt64Vect;
1045                 break;
1046             }
1047             break;
1048 
1049         case vtdInt:
1050             switch ( domain )
1051             {
1052             case vtdBool:
1053             case vtdUint:
1054             case vtdInt:
1055                 break;
1056             case vtdFloat:
1057                 fmt [ fmt_idx ] . type_cast = 1;
1058                 break;
1059             default:
1060                 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1061             }
1062 
1063             switch ( size_modifier )
1064             {
1065             case 't':
1066                 fmt [ fmt_idx ] . type = sptSignedInt8Vect;
1067                 break;
1068             case 'h':
1069                 fmt [ fmt_idx ] . type = sptSignedInt16Vect;
1070                 break;
1071             case 0:
1072                 fmt [ fmt_idx ] . type = sptSignedInt32Vect;
1073                 break;
1074             case 'l':
1075                 fmt [ fmt_idx ] . type = sptSignedInt64Vect;
1076                 break;
1077             }
1078             break;
1079 
1080         case vtdFloat:
1081             switch ( domain )
1082             {
1083             case vtdBool:
1084             case vtdUint:
1085             case vtdInt:
1086                 fmt [ fmt_idx ] . type_cast = 1;
1087                 break;
1088             case vtdFloat:
1089                 break;
1090             default:
1091                 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1092             }
1093 
1094             switch ( size_modifier )
1095             {
1096             case 'h':
1097                 fmt [ fmt_idx ] . type = sptFloat32Vect;
1098                 break;
1099             case 0:
1100                 fmt [ fmt_idx ] . type = sptFloat64Vect;
1101                 break;
1102             }
1103             break;
1104 
1105         case vtdAscii:
1106             switch ( domain )
1107             {
1108             case vtdAscii:
1109             case vtdUnicode:
1110                 if ( size_modifier != 0 )
1111                     fmt [ fmt_idx ] . type_cast = 1;
1112                 break;
1113             default:
1114                 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1115             }
1116 
1117             fmt [ fmt_idx ] . type = sptString;
1118 
1119             ++ pd -> str_idx;
1120             break;
1121 
1122         case vtdUnicode:
1123             switch ( domain )
1124             {
1125             case vtdAscii:
1126             case vtdUnicode:
1127                 switch ( size_modifier )
1128                 {
1129                 case 0:
1130                     fmt [ fmt_idx ] . type = sptString;
1131                     break;
1132                 case 'h':
1133                     fmt [ fmt_idx ] . type_cast = 1;
1134                     fmt [ fmt_idx ] . type = sptUCS2String;
1135                     break;
1136                 case 'l':
1137                     fmt [ fmt_idx ] . type_cast = 1;
1138                     fmt [ fmt_idx ] . type = sptUTF32String;
1139                     break;
1140                 }
1141                 break;
1142             default:
1143                 return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
1144             }
1145 
1146             ++ pd -> str_idx;
1147             break;
1148         }
1149 
1150         /* account for format argument */
1151         ++ fmt_idx;
1152         ++ pd -> arg_idx;
1153         ++ vargs -> idx;
1154     }
1155 
1156     /* record final fmt */
1157     if ( rc == 0 )
1158     {
1159         if ( fmt_idx == LOCAL_FMT_COUNT )
1160         {
1161             rc = create_overflow ( pd, fmt_idx );
1162             if ( rc != 0 )
1163                 return rc;
1164 
1165             fmt = pd -> fmt;
1166         }
1167 
1168         memset ( & fmt [ fmt_idx ++ ], 0, sizeof fmt [ 0 ] );
1169 
1170         /* if not all arguments were consumed, should this be an error? */
1171         if ( vargs -> idx != vargs -> dp -> argc )
1172         {
1173             /* produce warning */
1174         }
1175     }
1176 
1177     pd -> fmt_idx = fmt_idx;
1178 
1179     return rc;
1180 }
1181 
1182 VTRANSFACT_IMPL ( vdb_sprintf, 1, 0, 0 ) ( const void *self, const VXfactInfo *info,
1183     VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1184 {
1185     rc_t rc;
1186     Sprintf *obj;
1187 
1188     /* const format string */
1189     VAList vargs;
1190     const char *fmt_str = cp -> argv [ 0 ] . data . ascii;
1191 
1192     /* local formatting storage */
1193     PrintFmt fmt [ LOCAL_FMT_COUNT ];
1194 
1195     /* data block for parse */
1196     ParseData pd;
1197     pd . fmt_size = cp -> argv [ 0 ] . count;
1198     pd . fmt = fmt;
1199 
1200     /* packaged va_list */
1201     vargs . dp = dp;
1202     vargs . idx = 0;
1203 
1204     /* parse the format string */
1205     rc = parse_format_string ( fmt_str, & pd, & vargs );
1206     if ( rc == 0 )
1207     {
1208         /* the object size:
1209            literal data bytes +
1210            space for PrintFmt +
1211            space for PrintArg */
1212         size_t obj_extra = pd . lit_size +
1213             pd . fmt_idx * sizeof ( PrintFmt ) +
1214             pd . arg_idx * sizeof ( PrintArg ) +
1215             pd . str_idx * sizeof ( String );
1216         obj = malloc ( sizeof * obj + 1 + obj_extra );
1217         if ( obj == NULL )
1218             rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
1219         else
1220         {
1221             uint32_t i;
1222 
1223             char *lit;
1224             PrintFmt *dfmt;
1225             size_t lit_size;
1226 
1227             obj -> args = ( void* ) ( obj + 1 );
1228             dfmt = ( void* ) & obj -> args [ pd . arg_idx ];
1229             obj -> str = ( void* ) & dfmt [ pd . fmt_idx ];
1230             lit = ( void* ) & obj -> str [ pd . str_idx ];
1231             obj -> fmt = dfmt;
1232 
1233             /* not necessary or even helpful, but doesn't cost much */
1234             memset ( obj -> args, 0, pd . arg_idx * sizeof obj -> args [ 0 ] );
1235             memset ( obj -> str, 0, pd . str_idx * sizeof obj -> str [ 0 ] );
1236 
1237             /* copy format and literals */
1238             for ( lit_size = 0, i = 0; i < pd . fmt_idx; ++ i )
1239             {
1240                 /* simple copy */
1241                 dfmt [ i ] = pd . fmt [ i ];
1242                 if ( pd . fmt [ i ] . type == sptLiteral )
1243                 {
1244                     /* copy over literal data */
1245                     dfmt [ i ] . u . l . text = & lit [ lit_size ];
1246                     memmove ( & lit [ lit_size ],
1247                         pd . fmt [ i ] . u . l . text,
1248                         pd . fmt [ i ] . u . l . size );
1249                     lit_size += pd . fmt [ i ] . u . l . size;
1250                 }
1251             }
1252 
1253             /* NUL-terminate the literal text - again, doesn't help but doesn't hurt */
1254             lit [ lit_size ] = 0;
1255         }
1256     }
1257 
1258     /* douse any overflow memory used */
1259     if ( pd . fmt != fmt )
1260         KDataBufferWhack ( & pd . overflow );
1261 
1262 #if _DEBUGGING
1263     if ( rc == 0 )
1264     {
1265         rc = validate_obj ( obj, true );
1266         if ( rc != 0 )
1267             KDbgMsg ( "%s - self is bad on factory exit: %R\n", __func__, rc );
1268     }
1269 #endif
1270 
1271     if ( rc == 0 )
1272     {
1273         rslt -> self = obj;
1274         rslt -> whack = free;
1275         rslt -> u . rf = sprintf_func;
1276         rslt -> variant = vftRow;
1277     }
1278 
1279     return rc;
1280 }
1281