1 /*
2 
3     MPDM - Minimum Profit Data Manager
4     mpdm_s.c - String management
5 
6     ttcdt <dev@triptico.com> et al.
7 
8     This software is released into the public domain.
9     NO WARRANTY. See file LICENSE for details.
10 
11 */
12 
13 #include "config.h"
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <wchar.h>
19 #include <locale.h>
20 #include <wctype.h>
21 #include <time.h>
22 
23 #ifdef CONFOPT_GETTEXT
24 #include <libintl.h>
25 #endif
26 
27 #ifdef CONFOPT_WIN32
28 #include <windows.h>
29 #endif
30 
31 #include "mpdm.h"
32 
33 
34 /** code **/
35 
mpdm_poke_2(void * dst,int * dsize,int * offset,const void * org,int osize,int esize)36 void *mpdm_poke_2(void *dst, int *dsize, int *offset, const void *org,
37                   int osize, int esize)
38 {
39     if (org != NULL && osize) {
40         /* enough room? */
41         if (*offset + osize > *dsize) {
42             /* no; enlarge */
43             *dsize += osize;
44 
45             dst = realloc(dst, *dsize * esize);
46         }
47 
48         memcpy((char *) dst + (*offset * esize), org, osize * esize);
49         *offset += osize;
50     }
51 
52     return dst;
53 }
54 
55 
mpdm_poke(void * dst,int * dsize,const void * org,int osize,int esize)56 void *mpdm_poke(void *dst, int *dsize, const void *org, int osize, int esize)
57 /* pokes (adds) org into dst, which is a dynamic string, making it grow */
58 {
59     int offset = *dsize;
60 
61     return mpdm_poke_2(dst, dsize, &offset, org, osize, esize);
62 }
63 
64 
mpdm_pokewsn(wchar_t * dst,int * size,const wchar_t * str,int slen)65 wchar_t *mpdm_pokewsn(wchar_t *dst, int *size, const wchar_t *str, int slen)
66 /* adds a wide string to dst */
67 {
68     if (str && slen) {
69         /* open space */
70         dst = realloc(dst, (*size + slen + 1) * sizeof(wchar_t));
71 
72         /* copy */
73         memcpy(&dst[*size], str, slen * sizeof(wchar_t));
74 
75         /* increment counter */
76         *size += slen;
77 
78         /* NULL-terminate */
79         dst[*size] = L'\0';
80     }
81 
82     return dst;
83 }
84 
85 
mpdm_pokews(wchar_t * dst,int * dsize,const wchar_t * str)86 wchar_t *mpdm_pokews(wchar_t *dst, int *dsize, const wchar_t *str)
87 /* adds a wide string to dst */
88 {
89     return mpdm_pokewsn(dst, dsize, str, wcslen(str));
90 }
91 
92 
mpdm_pokev(wchar_t * dst,int * dsize,const mpdm_t v)93 wchar_t *mpdm_pokev(wchar_t *dst, int *dsize, const mpdm_t v)
94 /* adds the string in v to dst */
95 {
96     if (v != NULL) {
97         mpdm_ref(v);
98         dst = mpdm_pokews(dst, dsize, mpdm_string(v));
99         mpdm_unref(v);
100     }
101 
102     return dst;
103 }
104 
105 
mpdm_mbstowcs(const char * str,int * s,int l)106 wchar_t *mpdm_mbstowcs(const char *str, int *s, int l)
107 /* converts an mbs to a wcs, but filling invalid chars
108    with question marks instead of just failing */
109 {
110     wchar_t *ptr = NULL;
111     char tmp[64];               /* really MB_CUR_MAX + 1 */
112     wchar_t wc;
113     int n, i, c;
114     int t = 0;
115     char *cstr;
116 
117     /* allow NULL values for s */
118     if (s == NULL)
119         s = &t;
120 
121     /* if there is a limit, duplicate and break the string */
122     if (l >= 0) {
123         cstr = strdup(str);
124         cstr[l] = '\0';
125     }
126     else
127         cstr = (char *) str;
128 
129     /* try first a direct conversion with mbstowcs */
130     if ((*s = mbstowcs(NULL, cstr, 0)) != -1) {
131         /* direct conversion is possible; do it */
132         ptr = calloc((*s + 1), sizeof(wchar_t));
133         mbstowcs(ptr, cstr, *s);
134     }
135     else {
136         /* zero everything */
137         *s = n = i = 0;
138 
139         for (;;) {
140             /* no more characters to process? */
141             if ((c = cstr[n + i]) == '\0' && i == 0)
142                 break;
143 
144             tmp[i++] = c;
145             tmp[i] = '\0';
146 
147             /* try to convert */
148             if (mbstowcs(&wc, tmp, 1) == (int) - 1) {
149                 /* can still be an incomplete multibyte char? */
150                 if (c != '\0' && i <= (int) MB_CUR_MAX)
151                     continue;
152                 else {
153                     /* too many failing bytes; skip 1 byte
154                        and use the Unicode replacement char */
155                     wc = L'\xfffd';
156                     i = 1;
157                 }
158             }
159 
160             /* skip used bytes and back again */
161             n += i;
162             i = 0;
163 
164             /* store new char */
165             if ((ptr = mpdm_pokewsn(ptr, s, &wc, 1)) == NULL)
166                 break;
167         }
168     }
169 
170     /* free the duplicate */
171     if (cstr != str)
172         free(cstr);
173 
174     return ptr;
175 }
176 
177 
mpdm_wcstombs(const wchar_t * str,int * s)178 char *mpdm_wcstombs(const wchar_t *str, int *s)
179 /* converts a wcs to an mbs, but filling invalid chars
180    with question marks instead of just failing */
181 {
182     char *ptr = NULL;
183     int t = 0;
184 
185     /* allow NULL values for s */
186     if (s == NULL)
187         s = &t;
188 
189     /* try first a direct conversion with wcstombs */
190     if ((*s = wcstombs(NULL, str, 0)) != -1) {
191         /* direct conversion is possible; do it */
192         ptr = calloc(*s + 1, 1);
193         wcstombs(ptr, str, *s);
194     }
195     else {
196         /* if it fails, return "?" */
197         ptr = calloc(2, 1);
198         ptr[0] = '?';
199         return ptr;
200     }
201 
202     return ptr;
203 }
204 
205 
mpdm_new_wcs(const wchar_t * str,int size,int cpy)206 mpdm_t mpdm_new_wcs(const wchar_t *str, int size, int cpy)
207 /* creates a new string value from a wcs */
208 {
209     wchar_t *ptr = NULL;
210 
211     /* a size of -1 means 'calculate it' */
212     if (size == -1 && str != NULL)
213         size = wcslen(str);
214 
215     /* create a copy? */
216     if (size >= 0 && cpy) {
217         ptr = calloc(size + 1, sizeof(wchar_t));
218 
219         /* if there is a source, copy it */
220         if (str != NULL)
221             wcsncpy(ptr, str, size);
222     }
223 
224     return mpdm_new(MPDM_TYPE_STRING, ptr ? ptr : str, size);
225 }
226 
227 
mpdm_new_mbstowcs(const char * str,int l)228 mpdm_t mpdm_new_mbstowcs(const char *str, int l)
229 /* creates a new string value from an mbs */
230 {
231     wchar_t *ptr;
232     int size;
233 
234     ptr = mpdm_mbstowcs(str, &size, l);
235 
236     return mpdm_new(MPDM_TYPE_STRING, ptr, size);
237 }
238 
239 
mpdm_new_wcstombs(const wchar_t * str)240 mpdm_t mpdm_new_wcstombs(const wchar_t *str)
241 /* creates a new mbs value from a wbs */
242 {
243     char *ptr;
244     int size;
245 
246     ptr = mpdm_wcstombs(str, &size);
247 
248     return mpdm_new(MPDM_TYPE_MBS, ptr, size);
249 }
250 
251 
mpdm_new_i(int ival)252 mpdm_t mpdm_new_i(int ival)
253 /* creates a new string value from an integer */
254 {
255     return MPDM_C(MPDM_TYPE_INTEGER, &ival, sizeof(ival));
256 }
257 
258 
mpdm_new_r(double rval)259 mpdm_t mpdm_new_r(double rval)
260 /* creates a new string value from a real number */
261 {
262     return MPDM_C(MPDM_TYPE_REAL, &rval, sizeof(rval));
263 }
264 
265 
string_persist(wchar_t * str)266 static wchar_t *string_persist(wchar_t *str)
267 /* makes the allocated string persistent */
268 {
269     mpdm_t c, v;
270 
271     /* create the cache, if it does not exist yet */
272     if ((c = mpdm_get_wcs(mpdm_root(), L"__STRINGIFY__")) == NULL)
273         c = mpdm_set_wcs(mpdm_root(), MPDM_O(), L"__STRINGIFY__");
274 
275     if ((v = mpdm_get_wcs(c, str)) == NULL) {
276         /* string not cached; do it */
277         v = MPDM_ENS(str, -1);
278         mpdm_set(c, v, v);
279     }
280 
281     return (wchar_t *) v->data;
282 }
283 
284 
285 /* interface */
286 
287 /**
288  * mpdm_string - Returns a string representation of a value.
289  * @v: the value
290  *
291  * Returns a string representation of a value. For strings, it's
292  * the value data itself; for any other type, a conversion to string
293  * is returned instead. These values can be used for printing but
294  * they are mostly oriented to being used as indexes in objects.
295  *
296  * The reference count value in @v is not touched.
297  * [Strings]
298  */
mpdm_string(const mpdm_t v)299 wchar_t *mpdm_string(const mpdm_t v)
300 {
301     wchar_t *str;
302 
303     mpdm_ref(v);
304     str = mpdm_type_vc(v)->string(v);
305     mpdm_unrefnd(v);
306 
307     return str;
308 }
309 
310 
311 /**
312  * mpdm_cmp_wcs - Compares two values (string version).
313  * @v1: the first value
314  * @v2: the second value
315  *
316  * Compares the @v2 string against the stringified view of @v1.
317  */
mpdm_cmp_wcs(const mpdm_t v1,const wchar_t * v2)318 int mpdm_cmp_wcs(const mpdm_t v1, const wchar_t *v2)
319 {
320     int r;
321 
322     mpdm_ref(v1);
323     r = v2 == NULL ? 1 : wcscoll(mpdm_string(v1), v2);
324     mpdm_unref(v1);
325 
326     return r;
327 }
328 
329 
330 /**
331  * mpdm_splice_s - Creates a new string value from another.
332  * @v: the original value
333  * @i: the value to be inserted
334  * @offset: offset where the substring is to be inserted
335  * @del: number of characters to delete
336  * @n: an optional pointer to the new string
337  * @d: an optional pointer to the deleted string
338  *
339  * Creates a new string value from @v, deleting @del chars at @offset
340  * and substituting them by @i. If @del is 0, no deletion is done.
341  * both @offset and @del can be negative; if this is the case, it's
342  * assumed as counting from the end of @v. If @v is NULL, @i will become
343  * the new string, and both @offset and @del will be ignored. If @v is
344  * not NULL and @i is, no insertion process is done (only deletion, if
345  * applicable).
346  *
347  * Fills @n (in not NULL) with the new string, and @d (if not NULL)
348  * with the deleted portion.
349  *
350  * Returns the new value (if created) or the deleted value (if created).
351  * [Strings]
352  */
mpdm_splice_s(const mpdm_t v,const mpdm_t i,int offset,int del,mpdm_t * n,mpdm_t * d)353 mpdm_t mpdm_splice_s(const mpdm_t v, const mpdm_t i,
354                      int offset, int del, mpdm_t *n, mpdm_t *d)
355 /* do not use this; use mpdm_splice() */
356 {
357     mpdm_ref(v);
358     mpdm_ref(i);
359 
360     if (n) *n = NULL;
361     if (d) *d = NULL;
362 
363     if (v != NULL) {
364         wchar_t *str = mpdm_string(v);
365 
366         offset = mpdm_wrap_pointers(v, offset, &del);
367 
368         if (offset > mpdm_size(v))
369             offset = mpdm_size(v);
370 
371         if (d) {
372             /* deleted string */
373             *d = MPDM_NS(str + offset, del);
374         }
375 
376         if (n) {
377             wchar_t *ptr = NULL;
378             int s = 0;
379 
380             /* copy the start of the string */
381             ptr = mpdm_pokewsn(ptr, &s, str, offset);
382 
383             /* copy the inserted string */
384             ptr = mpdm_pokev(ptr, &s, i);
385 
386             /* copy the reminder */
387             ptr = mpdm_pokews(ptr, &s, str + offset + del);
388 
389             *n = ptr ? MPDM_ENS(ptr, s) : MPDM_S(L"");
390         }
391     }
392 
393     mpdm_unref(i);
394     mpdm_unref(v);
395 
396     /* returns the new value or the deleted value */
397     return n ? *n : (d ? *d : NULL);
398 }
399 
400 
401 /**
402  * mpdm_strcat_wcsn - Concatenates two strings (string with size version).
403  * @s1: the first string
404  * @s2: the second string
405  * @size: the size of the second string
406  *
407  * Returns a new string formed by the concatenation of @s1 and @s2.
408  * [Strings]
409  */
mpdm_strcat_wcsn(const mpdm_t s1,const wchar_t * s2,int size)410 mpdm_t mpdm_strcat_wcsn(const mpdm_t s1, const wchar_t *s2, int size)
411 {
412     mpdm_t r = NULL;
413 
414     if (s1 != NULL || s2 != NULL) {
415         wchar_t *ptr = NULL;
416         int s = 0;
417 
418         ptr = mpdm_pokev(ptr, &s, s1);
419         ptr = mpdm_pokewsn(ptr, &s, s2, size);
420 
421         r = ptr == NULL ? MPDM_S(L"") : MPDM_ENS(ptr, s);
422     }
423 
424     return r;
425 }
426 
427 
428 /**
429  * mpdm_strcat_wcs - Concatenates two strings (string version).
430  * @s1: the first string
431  * @s2: the second string
432  *
433  * Returns a new string formed by the concatenation of @s1 and @s2.
434  * [Strings]
435  */
mpdm_strcat_wcs(const mpdm_t s1,const wchar_t * s2)436 mpdm_t mpdm_strcat_wcs(const mpdm_t s1, const wchar_t *s2)
437 {
438     return mpdm_strcat_wcsn(s1, s2, s2 ? wcslen(s2) : 0);
439 }
440 
441 
442 /**
443  * mpdm_strcat - Concatenates two strings.
444  * @s1: the first string
445  * @s2: the second string
446  *
447  * Returns a new string formed by the concatenation of @s1 and @s2.
448  * [Strings]
449  */
mpdm_strcat(const mpdm_t s1,const mpdm_t s2)450 mpdm_t mpdm_strcat(const mpdm_t s1, const mpdm_t s2)
451 {
452     mpdm_t r;
453 
454     mpdm_ref(s2);
455     r = mpdm_strcat_wcs(s1, s2 ? mpdm_string(s2) : NULL);
456     mpdm_unref(s2);
457 
458     return r;
459 }
460 
461 
mpdm_ival_mbs(char * str)462 int mpdm_ival_mbs(char *str)
463 /* converts str to integer */
464 {
465     int i = 0;
466     char *fmt = "%i";
467 
468     /* workaround for mingw32: as it doesn't
469        correctly parse octal and hexadecimal
470        numbers, they are tried as special cases */
471     if (str[0] == '0') {
472         if (str[1] == 'b' || str[1] == 'B') {
473             /* binary number */
474             fmt = NULL;
475             char *ptr = &str[2];
476 
477             while (*ptr == '0' || *ptr == '1') {
478                 i <<= 1;
479 
480                 if (*ptr == '1')
481                     i |= 1;
482 
483                 ptr++;
484             }
485         }
486         else
487         if (str[1] == 'x' || str[1] == 'X')
488             fmt = "%x";
489         else
490             fmt = "%o";
491     }
492 
493     if (fmt != NULL)
494         sscanf(str, fmt, &i);
495 
496     return i;
497 }
498 
499 
500 /**
501  * mpdm_ival - Returns a value's data as an integer.
502  * @v: the value
503  *
504  * Returns a value's data as an integer.
505  * [Strings]
506  * [Value Management]
507  */
mpdm_ival(mpdm_t v)508 int mpdm_ival(mpdm_t v)
509 {
510     int i = 0;
511 
512     mpdm_ref(v);
513 
514     switch (mpdm_type(v)) {
515     case MPDM_TYPE_NULL:
516         break;
517 
518     case MPDM_TYPE_INTEGER:
519         i = *((int *)v->data);
520         break;
521 
522     case MPDM_TYPE_REAL:
523         i = (int) mpdm_rval(v);
524         break;
525 
526     case MPDM_TYPE_STRING:
527         {
528             char *mbs = mpdm_wcstombs(mpdm_string(v), NULL);
529             i = mpdm_ival_mbs(mbs);
530             free(mbs);
531         }
532 
533         break;
534 
535     case MPDM_TYPE_ARRAY:
536     case MPDM_TYPE_OBJECT:
537         i = mpdm_count(v);
538         break;
539 
540     default:
541         break;
542     }
543 
544     mpdm_unref(v);
545 
546     return i;
547 }
548 
549 
mpdm_rval_mbs(char * str)550 double mpdm_rval_mbs(char *str)
551 {
552     double r = 0.0;
553     char *prev_locale;
554 
555     /* set locale to C for non locale-dependent
556        floating point conversion */
557     prev_locale = setlocale(LC_NUMERIC, "C");
558 
559     /* read */
560     sscanf(str, "%lf", &r);
561 
562     /* set previous locale */
563     setlocale(LC_NUMERIC, prev_locale);
564 
565     return r;
566 }
567 
568 
569 /**
570  * mpdm_rval - Returns a value's data as a real number (double).
571  * @v: the value
572  *
573  * Returns a value's data as a real number (double float).
574  * [Strings]
575  * [Value Management]
576  */
mpdm_rval(mpdm_t v)577 double mpdm_rval(mpdm_t v)
578 {
579     double r = 0.0;
580 
581     mpdm_ref(v);
582 
583     switch (mpdm_type(v)) {
584     case MPDM_TYPE_NULL:
585         break;
586 
587     case MPDM_TYPE_REAL:
588         r = *((double *)v->data);
589         break;
590 
591     case MPDM_TYPE_INTEGER:
592         r = (double) mpdm_ival(v);
593         break;
594 
595     case MPDM_TYPE_STRING:
596         {
597             /* otherwise, calculate it */
598             char *mbs = mpdm_wcstombs(mpdm_string(v), NULL);
599             r = mpdm_rval_mbs(mbs);
600             free(mbs);
601         }
602 
603         break;
604 
605     case MPDM_TYPE_ARRAY:
606     case MPDM_TYPE_OBJECT:
607         r = (double) mpdm_count(v);
608         break;
609 
610     default:
611         break;
612     }
613 
614     mpdm_unref(v);
615 
616     return r;
617 }
618 
619 
620 /**
621  * mpdm_gettext - Translates a string to the current language.
622  * @str: the string
623  *
624  * Translates the @str string to the current language.
625  *
626  * This function can still be used even if there is no real gettext
627  * support() by manually filling the __I18N__ hash.
628  *
629  * If the string is found in the current table, the translation is
630  * returned; otherwise, the same @str value is returned.
631  * [Strings]
632  * [Localization]
633  */
mpdm_gettext(const mpdm_t str)634 mpdm_t mpdm_gettext(const mpdm_t str)
635 {
636     mpdm_t v = NULL;
637 
638     if (str) {
639         mpdm_t i18n = NULL;
640 
641         /* gets the cache */
642         if ((i18n = mpdm_get_wcs(mpdm_root(), L"__I18N__")) == NULL)
643             i18n = mpdm_set_wcs(mpdm_root(), MPDM_O(), L"__I18N__");
644 
645         mpdm_ref(str);
646 
647         /* try first the cache */
648         if ((v = mpdm_get(i18n, str)) == NULL) {
649 #ifdef CONFOPT_GETTEXT
650             char *s;
651             mpdm_t t;
652 
653             /* convert to mbs */
654             t = mpdm_ref(MPDM_2MBS(str->data));
655 
656             /* ask gettext for it */
657             s = gettext((char *) t->data);
658 
659             if (s != t->data)
660                 v = MPDM_MBS(s);
661             else
662                 v = str;
663 
664             mpdm_unref(t);
665 
666 #else                           /* CONFOPT_GETTEXT */
667 
668             v = str;
669 
670 #endif                          /* CONFOPT_GETTEXT */
671 
672             /* store in the cache */
673             mpdm_set(i18n, v, str);
674         }
675 
676         mpdm_unref(str);
677     }
678 
679     return v;
680 }
681 
682 
683 /**
684  * mpdm_gettext_domain - Sets domain and data directory for translations.
685  * @dom: the domain (application name)
686  * @data: directory contaning the .mo files
687  *
688  * Sets the domain (application name) and translation data for translating
689  * strings that will be returned by mpdm_gettext().@data must point to a
690  * directory containing the .mo (compiled .po) files.
691  *
692  * If there is no gettext support, returns 0, or 1 otherwise.
693  * [Strings]
694  * [Localization]
695  */
mpdm_gettext_domain(const mpdm_t dom,const mpdm_t data)696 int mpdm_gettext_domain(const mpdm_t dom, const mpdm_t data)
697 {
698     int ret = 0;
699 
700     mpdm_ref(dom);
701     mpdm_ref(data);
702 
703 #ifdef CONFOPT_GETTEXT
704 
705     mpdm_t dm;
706     mpdm_t dt;
707 
708     /* convert both to mbs,s */
709     dm = mpdm_ref(MPDM_2MBS(dom->data));
710     dt = mpdm_ref(MPDM_2MBS(data->data));
711 
712     /* bind and set domain */
713     bindtextdomain((char *) dm->data, (char *) dt->data);
714     textdomain((char *) dm->data);
715 
716     mpdm_set_wcs(mpdm_root(), MPDM_O(), L"__I18N__");
717 
718     mpdm_unref(dt);
719     mpdm_unref(dm);
720 
721     ret = 1;
722 
723 #endif                          /* CONFOPT_GETTEXT */
724 
725 #ifdef CONFOPT_WIN32
726 
727     mpdm_t v;
728 
729     if ((v = mpdm_get_wcs(mpdm_root(), L"ENV")) != NULL &&
730              mpdm_get_wcs(v, L"LANG") == NULL) {
731         const char *ptr = "en";
732         int c;
733         const char *win_langs[] = {
734             "en", "ar", "bg", "ca", "zh", "cz", "da", "de", "el", "en", /* 00-09 */
735             "es", "fi", "fr", "he", "hu", "is", "it", "jp", "ko", "nl", /* 0a-13 */
736             "no", "po", "pt", "rm", "ro", "ru", "sr", "sk", "sq", "sv", /* 14-1d */
737             "th", "tr", "ur", "id", "uk", "be", "sl", "et", "lv", "lt", /* 1e-27 */
738             "tg", "fa", "vi", "hy", "az"                                /* 28-2d */
739         };
740 
741         c = GetSystemDefaultLangID() & 0x00ff;
742 
743         /* MS Windows crappy language constants... */
744         if (c < sizeof(win_langs) / sizeof(char *))
745             ptr = win_langs[c];
746 
747         mpdm_set_wcs(v, MPDM_MBS(ptr), L"LANG");
748     }
749 
750 #endif                          /* CONFOPT_WIN32 */
751 
752     mpdm_unref(data);
753     mpdm_unref(dom);
754 
755     return ret;
756 }
757 
758 
759 #ifdef CONFOPT_WCWIDTH
760 
mpdm_wcwidth(wchar_t c)761 int mpdm_wcwidth(wchar_t c)
762 {
763     return wcwidth(c);
764 }
765 
766 #else                           /* CONFOPT_WCWIDTH */
767 
768 #include "wcwidth.c"
769 
mpdm_wcwidth(wchar_t c)770 int mpdm_wcwidth(wchar_t c)
771 {
772     return mk_wcwidth(c);
773 }
774 
775 #endif                          /* CONFOPT_WCWIDTH */
776 
s_mbstowcs(char * mbs,wchar_t * wcs)777 static wchar_t *s_mbstowcs(char *mbs, wchar_t *wcs)
778 {
779     int n;
780 
781     for (n = 0; mbs[n] != '\0'; n++)
782         wcs[n] = (wchar_t) mbs[n];
783 
784     return wcs;
785 }
786 
787 
json_s(wchar_t * o,int * l,mpdm_t v)788 static wchar_t *json_s(wchar_t *o, int *l, mpdm_t v)
789 {
790     wchar_t *p = mpdm_string(v);
791 
792     while (*p) {
793         if (*p == L'\n')
794             o = mpdm_pokews(o, l, L"\\n");
795         else
796         if (*p == L'\\')
797             o = mpdm_pokews(o, l, L"\\\\");
798         else
799         if (*p == L'"')
800             o = mpdm_pokews(o, l, L"\\\"");
801         else
802         if (*p < 32) {
803             char tmp[7];
804             wchar_t wtmp[7];
805 
806             sprintf(tmp, "\\u%04x", (unsigned int) *p);
807             o = mpdm_pokews(o, l, s_mbstowcs(tmp, wtmp));
808         }
809         else
810             o = mpdm_pokewsn(o, l, p, 1);
811 
812         p++;
813     }
814 
815     return o;
816 }
817 
818 
json_f(wchar_t * o,int * z,mpdm_t v,int l)819 static wchar_t *json_f(wchar_t *o, int *z, mpdm_t v, int l)
820 /* fills a %j JSON format */
821 {
822     mpdm_t w, i;
823     int n = 0, c = 0;
824 
825     /* special test: upper level can only be array or object */
826     if (!l && mpdm_type(v) != MPDM_TYPE_ARRAY && mpdm_type(v) != MPDM_TYPE_OBJECT)
827         goto end;
828 
829     switch (mpdm_type(v)) {
830     case MPDM_TYPE_NULL:
831         o = mpdm_pokews(o, z, L"null");
832         break;
833 
834     case MPDM_TYPE_OBJECT:
835         o = mpdm_pokews(o, z, L"{");
836 
837         while (mpdm_iterator(v, &n, &w, &i)) {
838             if (c)
839                 o = mpdm_pokews(o, z, L",");
840 
841             o = mpdm_pokews(o, z, L"\"");
842             o = json_s(o, z, i);
843             o = mpdm_pokews(o, z, L"\":");
844 
845             o = json_f(o, z, w, l + 1);
846 
847             c++;
848         }
849 
850         o = mpdm_pokews(o, z, L"}");
851 
852         break;
853 
854     case MPDM_TYPE_ARRAY:
855         o = mpdm_pokews(o, z, L"[");
856 
857         while (mpdm_iterator(v, &n, &w, NULL)) {
858             if (c)
859                 o = mpdm_pokews(o, z, L",");
860 
861             o = json_f(o, z, w, l + 1);
862 
863             c++;
864         }
865 
866         o = mpdm_pokews(o, z, L"]");
867 
868         break;
869 
870     case MPDM_TYPE_INTEGER:
871     case MPDM_TYPE_REAL:
872         o = mpdm_pokev(o, z, v);
873         break;
874 
875     case MPDM_TYPE_STRING:
876         o = mpdm_pokews(o, z, L"\"");
877         o = json_s(o, z, v);
878         o = mpdm_pokews(o, z, L"\"");
879 
880         break;
881 
882     default:
883         o = mpdm_pokews(o, z, L"\"");
884         o = mpdm_pokev(o, z, v);
885         o = mpdm_pokews(o, z, L"\"");
886         break;
887     }
888 
889 end:
890     return o;
891 }
892 
893 
mpdm_fmt(const mpdm_t fmt,const mpdm_t arg)894 mpdm_t mpdm_fmt(const mpdm_t fmt, const mpdm_t arg)
895 {
896     const wchar_t *i = fmt->data;
897     wchar_t c, *o = NULL;
898     int l = 0;
899     int n = 0;
900 
901     mpdm_ref(fmt);
902     mpdm_ref(arg);
903 
904     /* find first mark */
905     while ((c = i[n]) != L'\0' && c != L'%')
906         n++;
907 
908     o = mpdm_pokewsn(o, &l, i, n);
909     i = &i[n];
910 
911     /* format directive */
912     if (c == L'%') {
913         char t_fmt[128];
914         char tmp[1024];
915         char *ptr = NULL;
916         wchar_t *wptr = NULL;
917         int m = 0;
918 
919         /* transfer the % */
920         t_fmt[m++] = '%';
921         i++;
922 
923         /* transform the format to mbs */
924         while (*i != L'\0' &&
925                m < (int) (sizeof(t_fmt) - MB_CUR_MAX - 1) &&
926                wcschr(L"-.0123456789", *i) != NULL)
927             m += wctomb(&t_fmt[m], *i++);
928 
929         /* transfer the directive */
930         m += wctomb(&t_fmt[m], *i++);
931 
932         t_fmt[m] = '\0';
933 
934         /* by default, copies the format */
935         strcpy(tmp, t_fmt);
936 
937         switch (t_fmt[m - 1]) {
938         case 'd':
939         case 'i':
940         case 'u':
941         case 'x':
942         case 'X':
943         case 'o':
944 
945             /* integer value */
946             snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_ival(arg));
947             wptr = mpdm_mbstowcs(tmp, &m, -1);
948             break;
949 
950         case 'f':
951 
952             /* float (real) value */
953             snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_rval(arg));
954             wptr = mpdm_mbstowcs(tmp, &m, -1);
955             break;
956 
957         case 's':
958 
959             /* string value */
960             ptr = mpdm_wcstombs(mpdm_string(arg), NULL);
961             snprintf(tmp, sizeof(tmp) - 1, t_fmt, ptr);
962             free(ptr);
963             wptr = mpdm_mbstowcs(tmp, &m, -1);
964             break;
965 
966         case 'b':
967             /* binary dump */
968             ptr = tmp;
969             unsigned int mask;
970             int p = 0;
971             int bits = 0;
972 
973             /* zero pad? */
974             if (t_fmt[1] == '0') {
975                 p = 1;
976                 sscanf(&t_fmt[2], "%d", &bits);
977             }
978             else
979                 sscanf(&t_fmt[1], "%d", &bits);
980 
981             if (bits == 0)
982                 bits = sizeof(int) * 8;
983 
984             mask = 1 << (bits - 1);
985             while (mask) {
986                 if (mask & (unsigned int) mpdm_ival(arg)) {
987                     *ptr++ = '1';
988                     p = 1;
989                 }
990                 else
991                 if (p)
992                     *ptr++ = '0';
993 
994                 mask >>= 1;
995             }
996 
997             if (ptr == tmp)
998                 *ptr++ = '0';
999 
1000             *ptr = '\0';
1001             wptr = mpdm_mbstowcs(tmp, &m, -1);
1002             break;
1003 
1004         case 'j':
1005             o = json_f(o, &l, arg, 0);
1006             break;
1007 
1008         case 'J':
1009             /* 'lax' JSON: can be literal */
1010             o = json_f(o, &l, arg, 1);
1011             break;
1012 
1013         case 't':
1014             /* time: brace-enclosed strftime mask */
1015             if (*i == L'{') {
1016                 char tmp2[4096];
1017                 int j = 0;
1018                 struct tm *tm;
1019                 time_t t = mpdm_ival(arg);
1020 
1021                 i++;
1022                 while (*i != L'\0' && *i != L'}')
1023                     wctomb(&tmp2[j++], *i++);
1024                 tmp2[j] = '\0';
1025                 if (*i)
1026                     i++;
1027 
1028                 tm = localtime(&t);
1029                 strftime(tmp, sizeof(tmp), tmp2, tm);
1030                 wptr = mpdm_mbstowcs(tmp, &m, -1);
1031             }
1032             break;
1033 
1034         case 'c':
1035 
1036             /* char */
1037             c = mpdm_ival(arg);
1038             /* fallthrough ... */
1039 
1040         case '%':
1041 
1042             /* percent sign */
1043             o = mpdm_pokewsn(o, &l, &c, 1);
1044             break;
1045         }
1046 
1047         /* transfer */
1048         if (wptr != NULL) {
1049             o = mpdm_pokewsn(o, &l, wptr, m);
1050             free(wptr);
1051         }
1052     }
1053 
1054     /* fill the rest up to the end */
1055     n = 0;
1056     while (i[n] != L'\0')
1057         n++;
1058 
1059     o = mpdm_pokewsn(o, &l, i, n);
1060 
1061     mpdm_unref(arg);
1062     mpdm_unref(fmt);
1063 
1064     return o ? MPDM_ENS(o, l) : MPDM_S(L"");
1065 }
1066 
1067 
1068 /**
1069  * mpdm_sprintf - Formats a sprintf()-like string.
1070  * @fmt: the string format
1071  * @args: an array of values
1072  *
1073  * Formats a string using the sprintf() format taking the values from @args.
1074  * [Strings]
1075  */
mpdm_sprintf(const mpdm_t fmt,const mpdm_t args)1076 mpdm_t mpdm_sprintf(const mpdm_t fmt, const mpdm_t args)
1077 {
1078     int n;
1079     mpdm_t v;
1080 
1081     mpdm_ref(args);
1082 
1083     v = fmt;
1084     for (n = 0; n < mpdm_size(args); n++)
1085         v = mpdm_fmt(v, mpdm_get_i(args, n));
1086 
1087     mpdm_unref(args);
1088 
1089     return v;
1090 }
1091 
1092 
1093 /**
1094  * mpdm_ulc - Converts a string to uppercase or lowecase.
1095  * @s: the string
1096  * @u: convert to uppercase (1) or to lowercase (0).
1097  *
1098  * Converts @s to uppercase (for @u == 1) or to lowercase (@u == 0).
1099  * [Strings]
1100  */
mpdm_ulc(const mpdm_t s,int u)1101 mpdm_t mpdm_ulc(const mpdm_t s, int u)
1102 {
1103     mpdm_t r = NULL;
1104     wchar_t *iptr, *optr;
1105     int i, n;
1106 
1107     mpdm_ref(s);
1108 
1109     i = mpdm_size(s);
1110 
1111     optr = calloc((i + 1), sizeof(wchar_t));
1112     iptr = mpdm_string(s);
1113 
1114     for (n = 0; n < i; n++)
1115         optr[n] = u ? towupper(iptr[n]) : towlower(iptr[n]);
1116 
1117     r = MPDM_ENS(optr, i);
1118 
1119     mpdm_unref(s);
1120 
1121     return r;
1122 }
1123 
1124 
1125 enum {
1126     JS_ERROR = -1,
1127     JS_INCOMPLETE,
1128     JS_OCURLY,
1129     JS_OBRACK,
1130     JS_CCURLY,
1131     JS_CBRACK,
1132     JS_COMMA,
1133     JS_COLON,
1134     JS_VALUE,
1135     JS_STRING,
1136     JS_INTEGER,
1137     JS_REAL,
1138     JS_TRUE,
1139     JS_FALSE,
1140     JS_NULL,
1141     JS_ARRAY,
1142     JS_OBJECT
1143 };
1144 
json_lexer(wchar_t ** sp,int * t)1145 static mpdm_t json_lexer(wchar_t **sp, int *t)
1146 {
1147     wchar_t c;
1148     wchar_t *ptr = NULL;
1149     int size = 0;
1150     mpdm_t v = NULL;
1151     wchar_t *s = *sp;
1152 
1153     /* skip blanks */
1154     while (*s == L' ' || *s == L'\t' || *s == L'\n' || *s == L'\r')
1155         s++;
1156 
1157     c = *s++;
1158 
1159     if (c == L'{')
1160         *t = JS_OCURLY;
1161     else
1162     if (c == L'}')
1163         *t = JS_CCURLY;
1164     else
1165     if (c == L'[')
1166         *t = JS_OBRACK;
1167     else
1168     if (c == L']')
1169         *t = JS_CBRACK;
1170     else
1171     if (c == L',')
1172         *t = JS_COMMA;
1173     else
1174     if (c == L':')
1175         *t = JS_COLON;
1176     else
1177     if (c == L'"') {
1178         *t = JS_STRING;
1179 
1180         while ((c = *s) != L'"' && c != L'\0') {
1181             char tmp[5];
1182             int i;
1183 
1184             if (c == L'\\') {
1185                 s++;
1186                 c = *s;
1187                 switch (c) {
1188                 case 'n': c = L'\n'; break;
1189                 case 'r': c = L'\r'; break;
1190                 case 't': c = L'\t'; break;
1191                 case 'u': /* hex char */
1192                     s++;
1193                     tmp[0] = (char)*s; s++;
1194                     tmp[1] = (char)*s; s++;
1195                     tmp[2] = (char)*s; s++;
1196                     tmp[3] = (char)*s;
1197                     tmp[4] = '\0';
1198 
1199                     sscanf(tmp, "%04x", &i);
1200                     c = (wchar_t) i;
1201                     break;
1202                 }
1203             }
1204 
1205             ptr = mpdm_pokewsn(ptr, &size, &c, 1);
1206             s++;
1207         }
1208 
1209         if (c != L'\0')
1210             s++;
1211 
1212         if (ptr == NULL)
1213             v = MPDM_S(L"");
1214         else
1215             v = MPDM_ENS(ptr, size);
1216     }
1217     else
1218     if (c == L'-' || (c >= L'0' && c <= L'9') || c == L'.') {
1219         *t = JS_INTEGER;
1220 
1221         ptr = mpdm_pokewsn(ptr, &size, &c, 1);
1222 
1223         while (((c = *s) >= L'0' && c <= L'9') || c == L'.') {
1224             if (c == L'.')
1225                 *t = JS_REAL;
1226 
1227             ptr = mpdm_pokewsn(ptr, &size, &c, 1);
1228             s++;
1229         }
1230 
1231         v = MPDM_ENS(ptr, size);
1232 
1233         if (*t == JS_REAL)
1234             v = MPDM_R(mpdm_rval(v));
1235         else
1236             v = MPDM_I(mpdm_ival(v));
1237     }
1238     else
1239     if (c == 't' && wcsncmp(s, L"rue", 3) == 0) {
1240         s += 3;
1241         *t = JS_TRUE;
1242         v = mpdm_bool(1);
1243     }
1244     else
1245     if (c == 'f' && wcsncmp(s, L"alse", 4) == 0) {
1246         s += 4;
1247         *t = JS_FALSE;
1248         v = mpdm_bool(0);
1249     }
1250     else
1251     if (c == 'n' && wcsncmp(s, L"ull", 3) == 0) {
1252         s += 3;
1253         *t = JS_NULL;
1254     }
1255     else
1256         *t = JS_ERROR;
1257 
1258     *sp = s;
1259 
1260     return v;
1261 }
1262 
1263 
1264 static mpdm_t json_parse_array(wchar_t **s, int *t);
1265 static mpdm_t json_parse_object(wchar_t **s, int *t);
1266 
json_value(wchar_t ** s,int * t,mpdm_t v)1267 static mpdm_t json_value(wchar_t **s, int *t, mpdm_t v)
1268 {
1269     if (*t == JS_OBRACK)
1270         v = json_parse_array(s, t);
1271     else
1272     if (*t == JS_OCURLY)
1273         v = json_parse_object(s, t);
1274 
1275     if (*t >= JS_VALUE)
1276         *t = JS_VALUE;
1277     else {
1278         v = mpdm_void(v);
1279         *t = JS_ERROR;
1280     }
1281 
1282     return v;
1283 }
1284 
1285 
json_pair(wchar_t ** s,int * t,mpdm_t k)1286 static mpdm_t json_pair(wchar_t **s, int *t, mpdm_t k)
1287 {
1288     mpdm_t v = NULL;
1289 
1290     if (*t == JS_STRING) {
1291         v = json_lexer(s, t);
1292 
1293         if (*t == JS_COLON) {
1294             v = json_lexer(s, t);
1295             v = json_value(s, t, v);
1296         }
1297         else
1298             *t = JS_ERROR;
1299     }
1300     else
1301         *t = JS_ERROR;
1302 
1303     if (*t >= JS_VALUE)
1304         *t = JS_VALUE;
1305     else {
1306         k = mpdm_void(k);
1307         v = mpdm_void(v);
1308         *t = JS_ERROR;
1309     }
1310 
1311     return v;
1312 }
1313 
1314 
json_parse_object(wchar_t ** s,int * t)1315 static mpdm_t json_parse_object(wchar_t **s, int *t)
1316 {
1317     mpdm_t h = MPDM_O();
1318     mpdm_t k = NULL;
1319     int tt;
1320 
1321     *t = JS_INCOMPLETE;
1322 
1323     k = json_lexer(s, &tt);
1324 
1325     if (tt == JS_CCURLY)
1326         *t = JS_OBJECT;
1327     else {
1328         mpdm_t w = NULL;
1329 
1330         w = json_pair(s, &tt, k);
1331 
1332         if (tt == JS_VALUE) {
1333             mpdm_set(h, w, k);
1334 
1335             while (*t == JS_INCOMPLETE) {
1336                 k = json_lexer(s, &tt);
1337 
1338                 if (tt == JS_CCURLY)
1339                     *t = JS_OBJECT;
1340                 else
1341                 if (tt == JS_COMMA) {
1342                     k = json_lexer(s, &tt);
1343                     w = json_pair(s, &tt, k);
1344 
1345                     if (tt == JS_VALUE)
1346                         mpdm_set(h, w, k);
1347                     else
1348                         *t = JS_ERROR;
1349                 }
1350                 else
1351                     *t = JS_ERROR;
1352             }
1353         }
1354         else
1355             *t = JS_ERROR;
1356     }
1357 
1358     if (*t == JS_ERROR)
1359         h = mpdm_void(h);
1360 
1361     return h;
1362 }
1363 
1364 
json_parse_array(wchar_t ** s,int * t)1365 static mpdm_t json_parse_array(wchar_t **s, int *t)
1366 {
1367     mpdm_t a = MPDM_A(0);
1368     mpdm_t w = NULL;
1369     int tt;
1370 
1371     *t = JS_INCOMPLETE;
1372 
1373     w = json_lexer(s, &tt);
1374 
1375     if (tt == JS_CBRACK)
1376         *t = JS_ARRAY;
1377     else {
1378         w = json_value(s, &tt, w);
1379 
1380         if (tt == JS_VALUE) {
1381             mpdm_push(a, w);
1382 
1383             while (*t == JS_INCOMPLETE) {
1384                 w = json_lexer(s, &tt);
1385 
1386                 if (tt == JS_CBRACK)
1387                     *t = JS_ARRAY;
1388                 else
1389                 if (tt == JS_COMMA) {
1390                     w = json_lexer(s, &tt);
1391                     w = json_value(s, &tt, w);
1392 
1393                     if (tt == JS_VALUE)
1394                         mpdm_push(a, w);
1395                     else
1396                         *t = JS_ERROR;
1397                 }
1398                 else
1399                     *t = JS_ERROR;
1400             }
1401         }
1402         else
1403             *t = JS_ERROR;
1404     }
1405 
1406     if (*t == JS_ERROR)
1407         a = mpdm_void(a);
1408 
1409     return a;
1410 }
1411 
1412 
json_parser(wchar_t ** s)1413 mpdm_t json_parser(wchar_t **s)
1414 {
1415     mpdm_t v = NULL;
1416     int t;
1417 
1418     v = json_lexer(s, &t);
1419 
1420     if (t == JS_OCURLY)
1421         v = json_parse_object(s, &t);
1422     else
1423     if (t == JS_OBRACK)
1424         v = json_parse_array(s, &t);
1425     else
1426         t = JS_ERROR;
1427 
1428     if (t != JS_ARRAY && t != JS_OBJECT)
1429         v = mpdm_void(v);
1430 
1431     return v;
1432 }
1433 
1434 
json_parser_lax(wchar_t ** s)1435 mpdm_t json_parser_lax(wchar_t **s)
1436 {
1437     mpdm_t v = NULL;
1438     int t;
1439 
1440     v = json_lexer(s, &t);
1441 
1442     if (t == JS_OCURLY)
1443         v = json_parse_object(s, &t);
1444     else
1445     if (t == JS_OBRACK)
1446         v = json_parse_array(s, &t);
1447 
1448     if (t <= JS_VALUE)
1449         v = mpdm_void(v);
1450 
1451     return v;
1452 }
1453 
1454 
1455 /* scanf working buffers */
1456 #define SCANF_BUF_SIZE 1024
1457 static wchar_t scanf_yset[SCANF_BUF_SIZE];
1458 static wchar_t scanf_nset[SCANF_BUF_SIZE];
1459 static wchar_t scanf_mark[SCANF_BUF_SIZE];
1460 static int (*scanf_cb)(wint_t);
1461 
1462 struct {
1463     wchar_t cmd;
1464     wchar_t *yset;
1465     wchar_t *nset;
1466 } scanf_sets[] = {
1467     { L's',  L"",                         L" \t"},
1468     { L'u',  L"0123456789",               L""},
1469     { L'd',  L"-0123456789",              L""},
1470     { L'i',  L"-0123456789",              L""},
1471     { L'f',  L"-0123456789.",             L""},
1472     { L'x',  L"-0123456789xabcdefABCDEF", L""},
1473     { L'\0', NULL,                        NULL},
1474 };
1475 
1476 char *strptime(const char *s, const char *format, struct tm *tm);
1477 
niswalpha(wint_t i)1478 static int niswalpha(wint_t i) { return !iswalpha(i); }
1479 
1480 /**
1481  * mpdm_sscanf - Extracts data like sscanf().
1482  * @str: the string to be parsed
1483  * @fmt: the string format
1484  * @offset: the character offset to start scanning
1485  *
1486  * Extracts data from a string using a special format pattern, very
1487  * much like the scanf() series of functions in the C library. Apart
1488  * from the standard percent-sign-commands (s, u, d, i, f, x,
1489  * n, [; with optional size and * to ignore), it implements S,
1490  * to match a string of characters upto what follows in the format
1491  * string; w, to match an alphabetic word (taking locale
1492  * into account); W, to match the inverse; and r, to return the rest
1493  * of the string. Also, the [ set of characters can include
1494  * other % formats.
1495  *
1496  * Returns an array with the extracted values. If %n is used, the
1497  * position in the scanned string is returned as the value.
1498  * [Strings]
1499  */
mpdm_sscanf(const mpdm_t str,const mpdm_t fmt,int offset)1500 mpdm_t mpdm_sscanf(const mpdm_t str, const mpdm_t fmt, int offset)
1501 {
1502     wchar_t *i = mpdm_string(str);
1503     wchar_t *f = mpdm_string(fmt);
1504     mpdm_t r;
1505 
1506     mpdm_ref(fmt);
1507     mpdm_ref(str);
1508 
1509     i += offset;
1510     r = MPDM_A(0);
1511 
1512     while (str && fmt && *f) {
1513         if (*f == L'%') {
1514             wchar_t *ptr = NULL;
1515             int size = 0;
1516             wchar_t cmd;
1517             int vsize = 0;
1518             int ignore = 0;
1519             int msize = 0;
1520 
1521             /* empty all buffers */
1522             scanf_yset[0] = scanf_nset[0] = scanf_mark[0] = L'\0';
1523             scanf_cb = NULL;
1524 
1525             f++;
1526 
1527             /* an asterisk? don't return next value */
1528             if (*f == L'*') {
1529                 ignore = 1;
1530                 f++;
1531             }
1532 
1533             /* does it have a size? */
1534             while (wcschr(L"0123456789", *f)) {
1535                 vsize *= 10;
1536                 vsize += *f - L'0';
1537                 f++;
1538             }
1539 
1540             /* if no size, set it to an arbitrary big limit */
1541             if (!vsize)
1542                 vsize = 0xfffffff;
1543 
1544             /* now *f should contain a command */
1545             cmd = *f;
1546             f++;
1547 
1548             /* is it a verbatim percent sign? */
1549             if (cmd == L'%') {
1550                 vsize = 1;
1551                 ignore = 1;
1552                 wcscpy(scanf_yset, L"%");
1553             }
1554             else
1555                 /* a position? */
1556             if (cmd == L'n') {
1557                 vsize = 0;
1558                 ignore = 1;
1559                 mpdm_push(r, MPDM_I(i - mpdm_string(str)));
1560             }
1561             else
1562                 /* string upto a mark */
1563             if (cmd == L'S') {
1564                 wchar_t *tmp = f;
1565 
1566                 /* fill the mark upto another command */
1567                 while (*tmp) {
1568                     if (*tmp == L'%') {
1569                         tmp++;
1570 
1571                         /* is it an 'n'? ignore and go on */
1572                         if (*tmp == L'n') {
1573                             tmp++;
1574                             continue;
1575                         }
1576                         else
1577                         if (*tmp == L'%')
1578                             scanf_mark[msize++] = *tmp;
1579                         else
1580                             break;
1581                     }
1582                     else
1583                         scanf_mark[msize++] = *tmp;
1584 
1585                     tmp++;
1586                 }
1587 
1588                 scanf_mark[msize] = L'\0';
1589             }
1590             else
1591                 /* alphanumeric words */
1592             if (cmd == L'w') {
1593                 scanf_cb = iswalpha;
1594             }
1595             else
1596                 /* not alphanumeric words */
1597             if (cmd == L'W') {
1598                 scanf_cb = niswalpha;
1599             }
1600             else
1601                 /* rest of the string */
1602             if (cmd == L'r') {
1603                 /* do nothing; there are no filters,
1604                    therefore all is matched */
1605             }
1606             else
1607                 /* raw set */
1608             if (cmd == L'[') {
1609                 int n = 0;
1610                 wchar_t *set = scanf_yset;
1611 
1612                 /* is it an inverse set? */
1613                 if (*f == L'^') {
1614                     set = scanf_nset;
1615                     f++;
1616                 }
1617 
1618                 /* first one is a ]? add it */
1619                 if (*f == L']') {
1620                     set[n++] = *f;
1621                     f++;
1622                 }
1623 
1624                 /* now build the set */
1625                 for (; n < SCANF_BUF_SIZE - 1 && *f && *f != L']'; f++) {
1626                     /* is it a range? */
1627                     if (*f == L'-') {
1628                         f++;
1629 
1630                         /* start or end? hyphen itself */
1631                         if (n == 0 || *f == L']')
1632                             set[n++] = L'-';
1633                         else {
1634                             /* pick previous char */
1635                             wchar_t c = set[n - 1];
1636 
1637                             /* fill */
1638                             while (n < SCANF_BUF_SIZE - 1 && c < *f)
1639                                 set[n++] = ++c;
1640                         }
1641                     }
1642                     else
1643                         /* is it another command? */
1644                     if (*f == L'%') {
1645                         int i;
1646 
1647                         f++;
1648                         for (i = 0; scanf_sets[i].cmd; i++) {
1649                             if (*f == scanf_sets[i].cmd) {
1650                                 set[n] = L'\0';
1651                                 wcscat(set, scanf_sets[i].yset);
1652                                 n += wcslen(scanf_sets[i].yset);
1653                                 break;
1654                             }
1655                         }
1656                     }
1657                     else
1658                         set[n++] = *f;
1659                 }
1660 
1661                 /* skip the ] */
1662                 f++;
1663 
1664                 set[n] = L'\0';
1665             }
1666             else
1667                 /* strptime() format */
1668             if (cmd == L't') {
1669                 if (*f == L'{') {
1670                     char tmp_f[2048];
1671                     int n = 0;
1672                     struct tm tm;
1673                     char *cptr, *cptr2;
1674 
1675                     f++;
1676                     while (*f != L'\0' && *f != L'}')
1677                         wctomb(&tmp_f[n++], *f++);
1678                     tmp_f[n] = '\0';
1679 
1680                     if (*f)
1681                         f++;
1682 
1683                     cptr = mpdm_wcstombs(i, NULL);
1684                     memset(&tm, '\0', sizeof(tm));
1685 
1686 #ifdef CONFOPT_STRPTIME
1687                     cptr2 = strptime(cptr, tmp_f, &tm);
1688 #else
1689                     cptr2 = NULL;
1690 #endif
1691 
1692                     if (cptr2 != NULL) {
1693                         time_t t = mktime(&tm);
1694 
1695                         i += (cptr2 - cptr);
1696                         mpdm_push(r, MPDM_I(t));
1697                     }
1698 
1699                     free(cptr);
1700                     continue;
1701                 }
1702             }
1703             else
1704                 /* JSON parsing */
1705             if (cmd == L'j') {
1706                 mpdm_push(r, json_parser(&i));
1707             }
1708                 /* 'lax' JSON parsing */
1709             if (cmd == L'J') {
1710                 mpdm_push(r, json_parser_lax(&i));
1711             }
1712             else
1713                 /* a standard set? */
1714             {
1715                 int n;
1716 
1717                 for (n = 0; scanf_sets[n].cmd != L'\0'; n++) {
1718                     if (cmd == scanf_sets[n].cmd) {
1719                         wcscpy(scanf_yset, scanf_sets[n].yset);
1720                         wcscpy(scanf_nset, scanf_sets[n].nset);
1721                         break;
1722                     }
1723                 }
1724             }
1725 
1726             /* now fill the dynamic string */
1727             while (vsize &&
1728                    !wcschr(scanf_nset, *i) &&
1729                    (scanf_cb == NULL || scanf_cb(*i)) &&
1730                    (scanf_yset[0] == L'\0' || wcschr(scanf_yset, *i)) &&
1731                    (msize == 0 || wcsncmp(i, scanf_mark, msize) != 0)) {
1732 
1733                 /* only add if not being ignored */
1734                 if (!ignore)
1735                     ptr = mpdm_pokewsn(ptr, &size, i, 1);
1736 
1737                 i++;
1738                 vsize--;
1739             }
1740 
1741             if (!ignore && size) {
1742                 mpdm_push(r, MPDM_ENS(ptr, size));
1743             }
1744         }
1745         else
1746         if (*f == L' ' || *f == L'\t') {
1747             /* if it's a blank, sync to next non-blank */
1748             f++;
1749 
1750             while (*i == L' ' || *i == L'\t')
1751                 i++;
1752         }
1753         else
1754             /* test for literals in the format string */
1755         if (*i == *f) {
1756             i++;
1757             f++;
1758         }
1759         else
1760             break;
1761     }
1762 
1763     mpdm_unref(str);
1764     mpdm_unref(fmt);
1765 
1766     return r;
1767 }
1768 
1769 
1770 /**
1771  * mpdm_tr - Transliterates a string.
1772  * @str: the strnig
1773  * @s1: characters to be changed
1774  * @s2: characters to replace those in s1
1775  *
1776  * Creates a copy of @str, which will have all characters in @s1
1777  * replaced by those in @s2 matching their position.
1778  */
mpdm_tr(mpdm_t str,mpdm_t s1,mpdm_t s2)1779 mpdm_t mpdm_tr(mpdm_t str, mpdm_t s1, mpdm_t s2)
1780 {
1781     mpdm_t r;
1782     wchar_t *ptr;
1783     wchar_t *cs1;
1784     wchar_t *cs2;
1785     wchar_t c;
1786 
1787     mpdm_ref(str);
1788     mpdm_ref(s1);
1789     mpdm_ref(s2);
1790 
1791     /* create a copy of the string */
1792     r = MPDM_S(mpdm_string(str));
1793 
1794     ptr = mpdm_string(r);
1795     cs1 = mpdm_string(s1);
1796     cs2 = mpdm_string(s2);
1797 
1798     while ((c = *ptr)) {
1799         int n;
1800 
1801         for (n = 0; cs1[n] && cs2[n]; n++) {
1802             if (c == cs1[n]) {
1803                 *ptr = cs2[n];
1804                 break;
1805             }
1806         }
1807 
1808         ptr++;
1809     }
1810 
1811     mpdm_unref(s2);
1812     mpdm_unref(s1);
1813     mpdm_unref(str);
1814 
1815     return r;
1816 }
1817 
1818 
1819 /**
1820  * mpdm_escape - Escapes sets of characters in a string.
1821  * @v: the string
1822  * @low: lower character limit
1823  * @high: higher character limit
1824  * @f: format to apply
1825  *
1826  * Escapes characters from the @v string that are lower than
1827  * @low or higher than @high, applying the @f format, that can
1828  * be a string for a fmt() / sprintf() format or an executable
1829  * value.
1830  */
mpdm_escape(mpdm_t v,wchar_t low,wchar_t high,mpdm_t f)1831 mpdm_t mpdm_escape(mpdm_t v, wchar_t low, wchar_t high, mpdm_t f)
1832 {
1833     wchar_t *iptr, *optr;
1834     int z = 0;
1835     int n = 0;
1836 
1837     mpdm_ref(v);
1838     mpdm_ref(f);
1839 
1840     iptr = mpdm_string(v);
1841     optr = NULL;
1842 
1843     while (iptr[n]) {
1844         int m;
1845 
1846         /* skip characters inside range */
1847         for (m = n; iptr[m] && iptr[m] >= low && iptr[m] <= high; m++);
1848 
1849         /* copy them */
1850         optr = mpdm_pokewsn(optr, &z, &iptr[n], m - n);
1851 
1852         /* now apply format to all characters outside the range */
1853         while (iptr[m] && (iptr[m] < low || iptr[m] > high)) {
1854             mpdm_t w;
1855             wchar_t wc = iptr[m];
1856 
1857             switch (mpdm_type(f)) {
1858             case MPDM_TYPE_STRING:
1859                 w = mpdm_fmt(f, MPDM_I((int) wc));
1860                 optr = mpdm_pokev(optr, &z, w);
1861                 break;
1862 
1863             default:
1864                 break;
1865             }
1866 
1867             m++;
1868         }
1869 
1870         n = m;
1871     }
1872 
1873     mpdm_unref(f);
1874     mpdm_unref(v);
1875 
1876     return optr ? MPDM_NS(optr, z) : MPDM_S(L"");
1877 }
1878 
1879 
1880 /**
1881  * mpdm_utf8_to_wc - Converts from utf8 to wchar (streaming).
1882  * @w: convert wide char
1883  * @s: temporal state
1884  * @c: char read from stream
1885  *
1886  * Converts a stream of utf8 characters to wide char ones. The input
1887  * stream is read one byte at a time from @c and composed into @w
1888  * until a Unicode codepoint is ready. The @s integer keeps an internal
1889  * state change and must be set to 0 before the stream is read. It
1890  * detects encoding errors; in this case, the special Unicode
1891  * char U+FFFD is returned.
1892  *
1893  * When 0 is returned, a new wide char is available into @w. If
1894  * the stream is interrupted in the middle of a multibyte character,
1895  * the @s state variable will not be 0.
1896  */
mpdm_utf8_to_wc(wchar_t * w,int * s,char c)1897 int mpdm_utf8_to_wc(wchar_t *w, int *s, char c)
1898 {
1899     if (!*s && (c & 0x80) == 0) { /* 1 byte char */
1900         *w = c;
1901     }
1902     else
1903     if (!*s && (c & 0xe0) == 0xc0) { /* 2 byte char */
1904         *w = (c & 0x1f) << 6; *s = 1;
1905     }
1906     else
1907     if (!*s && (c & 0xf0) == 0xe0) { /* 3 byte char */
1908         *w = (c & 0x0f) << 12; *s = 2;
1909     }
1910     else
1911     if (!*s && (c & 0xf8) == 0xf0) { /* 4 byte char */
1912         *w = (c & 0x07) << 18; *s = 3;
1913     }
1914     else
1915     if (*s && (c & 0xc0) == 0x80) { /* continuation byte */
1916         switch (*s) {
1917         case 3: *w |= (c & 0x3f) << 12; break;
1918         case 2: *w |= (c & 0x3f) << 6;  break;
1919         case 1: *w |= (c & 0x3f);       break;
1920         }
1921 
1922         (*s)--;
1923     }
1924     else {
1925         *w = L'\xfffd';
1926         *s = 0;
1927     }
1928 
1929     return *s;
1930 }
1931 
1932 
1933 /**
1934  * mpdm_chomp - Deletes end of line characters at the end of a string.
1935  * @s: the value
1936  *
1937  * Returns a new string value like the original one but without any
1938  * possible \n or \r at the end.
1939  */
mpdm_chomp(mpdm_t s)1940 mpdm_t mpdm_chomp(mpdm_t s)
1941 {
1942     mpdm_t r = NULL;
1943     wchar_t *ptr;
1944     int n;
1945 
1946     mpdm_ref(s);
1947     ptr = mpdm_string(s);
1948     n = mpdm_size(s);
1949 
1950     /* discards end of lines at the end */
1951     for (; n && (ptr[n - 1] == L'\n' || ptr[n - 1] == L'\r'); n--);
1952 
1953     r = n ? MPDM_NS(ptr, n) : MPDM_S(L"");
1954 
1955     mpdm_unref(s);
1956 
1957     return r;
1958 }
1959 
1960 
1961 /** type vc **/
1962 
vc_default_string(mpdm_t v)1963 wchar_t *vc_default_string(mpdm_t v)
1964 {
1965     char tmp[64];
1966 
1967     sprintf(tmp, "%p", v);
1968     return string_persist(mpdm_mbstowcs(tmp, NULL, -1));
1969 }
1970 
vc_string_is_true(mpdm_t v)1971 static int vc_string_is_true(mpdm_t v)
1972 {
1973     wchar_t *ptr = mpdm_string(v);
1974 
1975     /* if it's "" or "0", it's false */
1976     return !(*ptr == L'\0' || wcscmp(ptr, L"0") == 0);
1977 }
1978 
vc_string_count(mpdm_t v)1979 static int vc_string_count(mpdm_t v)
1980 {
1981     return wcslen((wchar_t *)v->data);
1982 }
1983 
vc_string_get_i(mpdm_t v,int i)1984 static mpdm_t vc_string_get_i(mpdm_t v, int i)
1985 /* return a one-char value by offset */
1986 {
1987     mpdm_t r = NULL;
1988     wchar_t *ptr = mpdm_string(v);
1989     int l = wcslen(ptr);
1990 
1991     /* negative values */
1992     if (i < 0)
1993         i = l + i;
1994 
1995     if (i >= 0 && i < l)
1996         r = MPDM_NS(&ptr[i], 1);
1997 
1998     return r;
1999 }
2000 
vc_string_get(mpdm_t v,mpdm_t i)2001 static mpdm_t vc_string_get(mpdm_t v, mpdm_t i)
2002 {
2003     return vc_string_get_i(v, mpdm_ival(i));
2004 }
2005 
2006 
vc_string_string(mpdm_t v)2007 static wchar_t *vc_string_string(mpdm_t v)
2008 {
2009     return (wchar_t *)v->data;
2010 }
2011 
2012 
vc_integer_is_true(mpdm_t v)2013 static int vc_integer_is_true(mpdm_t v)
2014 {
2015     return mpdm_ival(v);
2016 }
2017 
vc_integer_string(mpdm_t v)2018 static wchar_t *vc_integer_string(mpdm_t v)
2019 {
2020     char tmp[64];
2021     wchar_t *str;
2022 
2023     sprintf(tmp, "%d", mpdm_ival(v));
2024     str = mpdm_mbstowcs(tmp, NULL, -1);
2025 
2026     return string_persist(str);
2027 }
2028 
2029 
vc_real_is_true(mpdm_t v)2030 static int vc_real_is_true(mpdm_t v)
2031 {
2032     return (mpdm_rval(v) != 0.0);
2033 }
2034 
vc_real_string(mpdm_t v)2035 static wchar_t *vc_real_string(mpdm_t v)
2036 {
2037     char tmp[64];
2038     wchar_t *str;
2039     char *prev_locale = setlocale(LC_NUMERIC, "C");
2040 
2041     sprintf(tmp, "%.15lf", mpdm_rval(v));
2042 
2043     setlocale(LC_NUMERIC, prev_locale);
2044 
2045     /* manually strip useless zeroes */
2046     if (strchr(tmp, '.') != NULL) {
2047         char *ptr;
2048 
2049         for (ptr = tmp + strlen(tmp) - 1; *ptr == '0'; ptr--);
2050 
2051         /* if it's over the ., strip it also */
2052         if (*ptr != '.')
2053             ptr++;
2054 
2055         *ptr = '\0';
2056     }
2057 
2058     str = mpdm_mbstowcs(tmp, NULL, -1);
2059 
2060     return string_persist(str);
2061 }
2062 
2063 
vc_string_iterator(mpdm_t set,int * context,mpdm_t * v,mpdm_t * i)2064 static int vc_string_iterator(mpdm_t set, int *context, mpdm_t *v, mpdm_t *i)
2065 {
2066     int ret = *context;
2067     wchar_t *ptr = (wchar_t *)set->data;
2068 
2069     if (ret < wcslen(ptr)) {
2070         /* return a string with one char */
2071         if (v) *v = MPDM_NS(&ptr[ret], 1);
2072         if (i) *i = MPDM_I(ret);
2073 
2074         ret++;
2075     }
2076     else
2077         ret = 0;
2078 
2079     return *context = ret;
2080 }
2081 
2082 
vc_number_iterator(mpdm_t set,int * context,mpdm_t * v,mpdm_t * i)2083 static int vc_number_iterator(mpdm_t set, int *context, mpdm_t *v, mpdm_t *i)
2084 {
2085     int ret = *context;
2086 
2087     if (ret < mpdm_ival(set)) {
2088         if (v) *v = MPDM_I(ret);
2089         if (i) *i = MPDM_I(ret);
2090 
2091         ret++;
2092     }
2093     else
2094         ret = 0;
2095 
2096     return *context = ret;
2097 }
2098 
2099 
vc_string_map(mpdm_t set,mpdm_t filter,mpdm_t ctxt)2100 static mpdm_t vc_string_map(mpdm_t set, mpdm_t filter, mpdm_t ctxt)
2101 {
2102     mpdm_t r;
2103 
2104     /* is the filter a regex? */
2105     if (mpdm_type(filter) == MPDM_TYPE_REGEX) {
2106         /* match the regex sequentially until the end of the string */
2107         mpdm_t v;
2108         int n = 0;
2109 
2110         r = MPDM_A(0);
2111 
2112         while ((v = mpdm_regex(set, filter, n))) {
2113             mpdm_push(r, v);
2114             n = mpdm_regex_offset + mpdm_regex_size;
2115         }
2116     }
2117     else
2118         /* do the default */
2119         r = vc_default_map(set, filter, ctxt);
2120 
2121     return r;
2122 }
2123 
2124 
2125 struct mpdm_type_vc mpdm_vc_string = { /* VC */
2126     L"string",              /* name */
2127     vc_default_destroy,     /* destroy */
2128     vc_string_is_true,      /* is_true */
2129     vc_string_count,        /* count */
2130     vc_string_get_i,        /* get_i */
2131     vc_string_get,          /* get */
2132     vc_string_string,       /* string */
2133     vc_default_del_i,       /* del_i */
2134     vc_default_del,         /* del */
2135     vc_default_set_i,       /* set_i */
2136     vc_default_set,         /* set */
2137     vc_default_exec,        /* exec */
2138     vc_string_iterator,     /* iterator */
2139     vc_string_map           /* map */
2140 };
2141 
2142 struct mpdm_type_vc mpdm_vc_mbs = { /* VC */
2143     L"mbs",                 /* name */
2144     vc_default_destroy,     /* destroy */
2145     vc_default_is_true,     /* is_true */
2146     vc_default_count,       /* count */
2147     vc_default_get_i,       /* get_i */
2148     vc_default_get,         /* get */
2149     vc_default_string,      /* string */
2150     vc_default_del_i,       /* del_i */
2151     vc_default_del,         /* del */
2152     vc_default_set_i,       /* set_i */
2153     vc_default_set,         /* set */
2154     vc_default_exec,        /* exec */
2155     vc_default_iterator,    /* iterator */
2156     vc_default_map          /* map */
2157 };
2158 
2159 struct mpdm_type_vc mpdm_vc_integer = { /* VC */
2160     L"integer",             /* name */
2161     vc_default_destroy,     /* destroy */
2162     vc_integer_is_true,     /* is_true */
2163     vc_default_count,       /* count */
2164     vc_default_get_i,       /* get_i */
2165     vc_default_get,         /* get */
2166     vc_integer_string,      /* string */
2167     vc_default_del_i,       /* del_i */
2168     vc_default_del,         /* del */
2169     vc_default_set_i,       /* set_i */
2170     vc_default_set,         /* set */
2171     vc_default_exec,        /* exec */
2172     vc_number_iterator,     /* iterator */
2173     vc_default_map          /* map */
2174 };
2175 
2176 struct mpdm_type_vc mpdm_vc_real = { /* VC */
2177     L"real",                /* name */
2178     vc_default_destroy,     /* destroy */
2179     vc_real_is_true,        /* is_true */
2180     vc_default_count,       /* count */
2181     vc_default_get_i,       /* get_i */
2182     vc_default_get,         /* get */
2183     vc_real_string,         /* string */
2184     vc_default_del_i,       /* del_i */
2185     vc_default_del,         /* del */
2186     vc_default_set_i,       /* set_i */
2187     vc_default_set,         /* set */
2188     vc_default_exec,        /* exec */
2189     vc_number_iterator,     /* iterator */
2190     vc_default_map          /* map */
2191 };
2192