1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <kapp/extern.h>
28 #include <sysalloc.h>
29 
30 #include <kapp/args.h>
31 
32 #include <klib/rc.h>
33 #include <klib/log.h>
34 #include <klib/text.h>
35 
36 #include <vfs/path.h>
37 #include <vfs/manager.h>
38 #include <kfs/file.h>
39 
40 #include <os-native.h>
41 
42 #include <string.h>
43 #include <stdlib.h>
44 
45 
46 #define ARGV_INC 10
47 #define TOKEN_INC 512
48 
49 /*
50    strtok() not used, because we tokenize the content of a file!
51    we do that in chunks of 4096 char's
52    a token can span multiple chunks!
53    we also support quoting and escaping ( a la bash )
54 */
55 
56 typedef struct tokenzr
57 {
58     int * argc;
59     char *** argv;
60     char *token;
61     size_t allocated;
62     size_t used;
63     uint32_t state;
64     uint32_t nargs;
65     char escape[ 3 ];
66     char n_escape;
67     char m_escape;
68 } tokenzr;
69 
70 
make_tokenzr(tokenzr ** t,int * argc,char *** argv)71 static rc_t make_tokenzr( tokenzr **t, int * argc, char *** argv )
72 {
73     rc_t rc = 0;
74     if ( t == NULL )
75     {
76         rc = RC( rcApp, rcNoTarg, rcConstructing, rcSelf, rcNull );
77     }
78     else
79     {
80         tokenzr *t1 = malloc( sizeof *t1 );
81         if ( t1 == NULL )
82         {
83             rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
84         }
85         else
86         {
87             t1->token = malloc( TOKEN_INC );
88             if ( t1->token == NULL )
89             {
90                 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
91             }
92             else
93             {
94                 t1->allocated = TOKEN_INC;
95                 t1->used = 0;
96                 t1->state = 0;
97                 t1->nargs = 0;
98                 t1->argc = argc;
99                 t1->argv = argv;
100                 t1->n_escape = 0;
101                 t1->m_escape = 0;
102             }
103         }
104         if ( rc == 0 )
105         {
106             *t = t1;
107         }
108     }
109     return rc;
110 }
111 
112 
free_tokenzr(tokenzr * t)113 static void free_tokenzr( tokenzr *t )
114 {
115     if ( t != NULL )
116     {
117         if ( t->token != NULL )
118             free( t->token );
119         free( t );
120     }
121 }
122 
123 
expand_argv(tokenzr * t)124 static rc_t expand_argv( tokenzr *t )
125 {
126     rc_t rc = 0;
127     char ** pargv = *(t->argv);
128 
129     if ( pargv == NULL )
130     {
131         pargv = malloc( ARGV_INC * ( sizeof pargv[ 0 ] ) );
132         if ( pargv != NULL )
133         {
134             t->nargs = ARGV_INC;
135             ( *(t->argc) ) = 0;
136         }
137         else
138             rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
139     }
140     else
141     {
142         if ( (uint32_t) ( *(t->argc) ) >= t->nargs )
143         {
144             char ** pargv1 = realloc( pargv, ( ( t->nargs + ARGV_INC ) * ( sizeof pargv[ 0 ] ) ) );
145             if ( pargv1 != NULL )
146             {
147                 t->nargs += ARGV_INC;
148                 pargv = pargv1;
149             }
150             else
151             {
152                 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
153                 free( pargv );
154                 pargv = NULL;
155             }
156         }
157     }
158     if ( rc == 0 )
159     {
160         *(t->argv) =  pargv;
161     }
162     return rc;
163 }
164 
165 
add_string_to_argv(tokenzr * t,const char * str,size_t len)166 static rc_t add_string_to_argv( tokenzr *t, const char * str, size_t len )
167 {
168     rc_t rc = expand_argv( t );
169     if ( rc == 0 )
170     {
171         int argc = *(t->argc);
172         char ** pargv = *(t->argv);
173 
174         pargv[ argc ] = string_dup ( str, len );
175         if ( pargv[ argc ] == NULL )
176             rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
177         else
178         {
179             ( *(t->argc) )++;
180             *(t->argv) =  pargv;
181             t->used = 0;
182         }
183     }
184     return rc;
185 }
186 
187 
add_token_to_argv(tokenzr * t)188 static rc_t add_token_to_argv( tokenzr *t )
189 {
190     return add_string_to_argv( t, t->token, t->used );
191 }
192 
193 
add_buffer_to_token(tokenzr * t,const char * buffer,size_t buflen)194 static rc_t add_buffer_to_token( tokenzr *t, const char *buffer, size_t buflen )
195 {
196     rc_t rc = 0;
197     if ( t->used + buflen > t->allocated )
198     {
199         size_t new_size = t->used + buflen + TOKEN_INC;
200         char * temp = realloc( t->token, new_size );
201         if ( temp == NULL )
202         {
203             rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
204         }
205         else
206         {
207             t->token = temp;
208             t->allocated = new_size;
209         }
210     }
211     if ( rc == 0 )
212     {
213         memmove( t->token + t->used, buffer, buflen );
214         t->used += buflen;
215     }
216     return rc;
217 }
218 
219 
220 /* this is the 'normal' state, that the buffer is divided into tokens by white-space */
221 const static char delim0[] = " \r\n\f\t\v\\\"";
tokenize_state0(tokenzr * t,bool * done,char ** ptr,const char * buffer,size_t buflen)222 static rc_t tokenize_state0( tokenzr *t, bool *done, char ** ptr, const char *buffer, size_t buflen )
223 {
224     rc_t rc;
225     char * s = strpbrk ( *ptr, delim0 );
226     if ( s == NULL )
227     {
228         /* delimiting char not found!
229             ---> add everything to the tokenbuffer */
230         size_t to_add = buflen - ( *ptr - buffer );
231         rc = add_buffer_to_token( t, *ptr, to_add );
232         *done = true;
233     }
234     else
235     {
236         /* delimiting char found!
237             ---> add everything from ptr to s to the tokenbuffer
238             ---> then add token to argv
239             ---> then clear token */
240         rc = add_buffer_to_token( t, *ptr, s - *ptr );
241         if ( rc == 0 )
242         {
243             if ( t->used > 0 && *s != '\\' )
244                 rc = add_token_to_argv( t );
245             *ptr = s + 1;
246             *done = ( *ptr >= buffer + buflen );
247             switch( *s )
248             {
249                 case '"'  : t->state =  1; break;
250                 case '\\' : t->state =  2; break;
251             }
252         }
253     }
254     return rc;
255 }
256 
257 
258 /* this state, provides 'quoting' ( prevents tokenizing by putting quotes "" around strings
259     which would be otherwise separated by whitespace ) */
260 const static char delim1[] = "\"";
tokenize_state1(tokenzr * t,bool * done,char ** ptr,const char * buffer,size_t buflen)261 static rc_t tokenize_state1( tokenzr *t, bool *done, char ** ptr, const char *buffer, size_t buflen )
262 {
263     rc_t rc;
264     char * s = strpbrk ( *ptr, delim1 );
265     if ( s == NULL )
266     {
267         /* delimiting char not found!
268             ---> add everything to the tokenbuffer */
269         size_t to_add = buflen - ( *ptr - buffer );
270         rc = add_buffer_to_token( t, *ptr, to_add );
271         *done = true;
272     }
273     else
274     {
275         /* delimiting char found!
276             ---> add everything from ptr to s to the tokenbuffer
277             ---> then add token to argv
278             ---> then clear token */
279         rc = add_buffer_to_token( t, *ptr, s - *ptr );
280         if ( rc == 0 )
281         {
282             if ( t->used > 0 )
283                 rc = add_token_to_argv( t );
284             *ptr = s + 1;
285             *done = ( *ptr >= buffer + buflen );
286             if ( *s == '"' )
287                 t->state = 0;
288         }
289     }
290     return rc;
291 }
292 
293 
294 /* this state, provides 'escaping' ( \nnn or \xHH or \' or \" ) */
tokenize_state2(tokenzr * t,bool * done,char ** ptr,const char * buffer,size_t buflen)295 static rc_t tokenize_state2( tokenzr *t, bool *done, char ** ptr, const char *buffer, size_t buflen )
296 {
297     rc_t rc = 0;
298     char c = **ptr;
299     switch( c )
300     {
301         case '\\' : /* no break intended ! */
302         case '\'' :
303         case '\"' : rc = add_buffer_to_token( t, (*ptr)++, 1 );
304                     break;
305 
306         case 't'  : rc = add_buffer_to_token( t, "\t", 1 );
307                     (*ptr)++;
308                     break;
309         case 'n'  : rc = add_buffer_to_token( t, "\n", 1 );
310                     (*ptr)++;
311                     break;
312         case 'r'  : rc = add_buffer_to_token( t, "\r", 1 );
313                     (*ptr)++;
314                     break;
315 
316     }
317     t->state = 0;
318     *done = ( *ptr >= buffer + buflen );
319     return rc;
320 }
321 
322 
tokenize_buffer(tokenzr * t,const char * buffer,size_t buflen)323 static rc_t tokenize_buffer( tokenzr *t, const char *buffer, size_t buflen )
324 {
325     rc_t rc = 0;
326     char * ptr = ( char * ) buffer;
327     bool done = false;
328     while ( rc == 0 && !done )
329         switch( t->state )
330         {
331             case 0 : rc = tokenize_state0( t, &done, &ptr, buffer, buflen ); break;
332             case 1 : rc = tokenize_state1( t, &done, &ptr, buffer, buflen ); break;
333             case 2 : rc = tokenize_state2( t, &done, &ptr, buffer, buflen ); break;
334         }
335     return rc;
336 }
337 
338 
tokenize_file_and_progname_into_argv(const char * filename,const char * progname,int * argc,char *** argv)339 static rc_t tokenize_file_and_progname_into_argv( const char * filename, const char * progname,
340                                                   int * argc, char *** argv )
341 {
342     rc_t rc2, rc = 0;
343     VFSManager *vfs_mgr;
344 
345     ( *argv ) = NULL;
346     ( *argc ) = 0;
347     rc = VFSManagerMake ( &vfs_mgr );
348     if ( rc != 0 )
349         LOGERR( klogInt, rc, "VFSManagerMake() failed" );
350     else
351     {
352         VPath * vfs_path;
353         rc = VFSManagerMakePath ( vfs_mgr, &vfs_path, "%s", filename );
354         if ( rc != 0 )
355             LOGERR( klogInt, rc, "VPathMake() failed" );
356         else
357         {
358             struct KFile const *my_file;
359             rc = VFSManagerOpenFileRead ( vfs_mgr, &my_file, vfs_path );
360             if ( rc != 0 )
361                 LOGERR( klogInt, rc, "VFSManagerOpenFileRead() failed" );
362             else
363             {
364                 tokenzr *t;
365                 uint64_t pos = 0;
366                 char buffer[ 4096 + 1 ];
367                 size_t num_read;
368 
369                 rc = make_tokenzr( &t, argc, argv );
370                 if ( rc != 0 )
371                     LOGERR( klogInt, rc, "make_tokenzr() failed" );
372                 else
373                 {
374                     if ( progname != NULL )
375                         rc = add_string_to_argv( t, progname, string_size( progname ) );
376 
377                     if ( rc == 0 )
378                     {
379                         do
380                         {
381                             rc = KFileRead ( my_file, pos, buffer, ( sizeof buffer ) - 1, &num_read );
382                             if ( rc != 0 )
383                                 LOGERR( klogInt, rc, "KFileRead() failed" );
384                             else if ( num_read > 0 )
385                             {
386                                 buffer[ num_read ]  = 0;
387                                 rc = tokenize_buffer( t, buffer, num_read );
388                                 if ( rc != 0 )
389                                     LOGERR( klogInt, rc, "tokenize_buffer() failed" );
390                                 pos += num_read;
391                             }
392                         } while ( rc == 0 && num_read > 0 );
393                     }
394 
395                     if ( rc == 0 && t->used > 0 )
396                     {
397                         rc = add_token_to_argv( t );
398                         if ( rc != 0 )
399                             LOGERR( klogInt, rc, "add_token_to_argv() failed" );
400                     }
401                     free_tokenzr( t );
402                 }
403                 rc2 = KFileRelease ( my_file );
404                 if ( rc2 != 0 )
405                     LOGERR( klogInt, rc2, "KFileRelease() failed" );
406             }
407             rc2 = VPathRelease ( vfs_path );
408             if ( rc2 != 0 )
409                 LOGERR( klogInt, rc2, "VPathRelease() failed" );
410         }
411         rc2 = VFSManagerRelease ( vfs_mgr );
412         if ( rc2 != 0 )
413             LOGERR( klogInt, rc2, "VFSManagerRelease() failed" );
414     }
415     return rc;
416 }
417 
418 
Args_tokenize_file_into_argv(const char * filename,int * argc,char *** argv)419 rc_t CC Args_tokenize_file_into_argv( const char * filename, int * argc, char *** argv )
420 {
421     return tokenize_file_and_progname_into_argv( filename, NULL, argc, argv );
422 }
423 
424 
Args_tokenize_file_and_progname_into_argv(const char * filename,const char * progname,int * argc,char *** argv)425 rc_t CC Args_tokenize_file_and_progname_into_argv( const char * filename, const char * progname,
426                                                                int * argc, char *** argv )
427 {
428     return tokenize_file_and_progname_into_argv( filename, progname, argc, argv );
429 }
430 
431 
Args_free_token_argv(int argc,char * argv[])432 void CC Args_free_token_argv( int argc, char * argv[] )
433 {
434     if ( argv != NULL )
435     {
436         int i;
437         for ( i = 0; i < argc; ++i )
438             free( argv[ i ] );
439         free( argv );
440     }
441 }
442 
443 
sized_str_cmp(const char * a,const char * b)444 static int sized_str_cmp( const char *a, const char *b )
445 {
446     size_t asize = string_size ( a );
447     size_t bsize = string_size ( b );
448     return strcase_cmp ( a, asize, b, bsize, (uint32_t) ( ( asize > bsize ) ? asize : bsize ) );
449 }
450 
451 
452 /* get's the filename following a file_option - parameter into the file-name-buffer */
Args_find_option_in_argv(int argc,char * argv[],const char * option_name,char * option,size_t option_len)453 rc_t CC Args_find_option_in_argv( int argc, char * argv[],
454                                               const char * option_name,
455                                               char * option, size_t option_len )
456 {
457     rc_t rc = 0;
458     int i, opt_idx;
459 
460     if ( option == NULL )
461         return RC( rcRuntime, rcArgv, rcConstructing, rcParam, rcNull );
462 
463     option[ 0 ] = 0;
464 
465     if ( argv == NULL || argc < 1 )
466         return RC( rcRuntime, rcArgv, rcConstructing, rcSelf, rcNull );
467     if ( option_name == NULL || option_len == 0 )
468         return RC( rcRuntime, rcArgv, rcConstructing, rcParam, rcNull );
469 
470     opt_idx = -1;
471     for ( i = 0; i < argc && ( option[ 0 ] == 0 ); ++i )
472     {
473         if ( sized_str_cmp( (const char *)argv[i], option_name ) == 0 )
474         {
475                 opt_idx = ( i + 1 );
476         }
477         else if ( i == opt_idx )
478         {
479             string_copy( option, option_len, argv[i], string_size( argv[i] ) );
480         }
481     }
482     if ( option[ 0 ] == 0 )
483         rc = RC( rcRuntime, rcArgv, rcConstructing, rcParam, rcNotFound );
484     return rc;
485 }
486 
487 
Args_parse_inf_file(Args * args,const char * file_option)488 rc_t CC Args_parse_inf_file( Args * args, const char * file_option )
489 {
490     uint32_t count;
491     rc_t rc = ArgsOptionCount( args, file_option, &count );
492     if ( rc != 0 )
493         LOGERR( klogInt, rc, "ArgsOptionCount() failed" );
494     else if ( count > 0 )
495     {
496         uint32_t count2 = 0;
497         do
498         {
499             uint32_t idx;
500             for ( idx = count2; idx < count && rc == 0; ++idx )
501             {
502                 const char *filename;
503                 rc = ArgsOptionValue( args, file_option, idx, (const void **)&filename );
504                 if ( rc != 0 )
505                     LOGERR( klogInt, rc, "ArgsOptionValue() failed" );
506                 else if ( filename != NULL )
507                 {
508                     int argc;
509                     char ** argv;
510                     rc = Args_tokenize_file_into_argv( filename, &argc, &argv );
511                     if ( rc == 0 && argv != NULL && argc > 0 )
512                     {
513                         rc = ArgsParse ( args, argc, argv );
514                         Args_free_token_argv( argc, argv );
515                     }
516                 }
517             }
518             count2 = count;
519             rc = ArgsOptionCount( args, file_option, &count );
520             if ( rc != 0 )
521                 LOGERR( klogInt, rc, "ArgsOptionCount() failed" );
522         } while ( rc == 0 && count > count2 );
523     }
524     return rc;
525 }
526