1 /*
2
3 MPDM - Minimum Profit Data Manager
4 mpdm_s.c - String management
5
6 ttcdt <dev@triptico.com> et al.
7
8 This software is released into the public domain.
9 NO WARRANTY. See file LICENSE for details.
10
11 */
12
13 #include "config.h"
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <wchar.h>
19 #include <locale.h>
20 #include <wctype.h>
21 #include <time.h>
22
23 #ifdef CONFOPT_GETTEXT
24 #include <libintl.h>
25 #endif
26
27 #ifdef CONFOPT_WIN32
28 #include <windows.h>
29 #endif
30
31 #include "mpdm.h"
32
33
34 /** code **/
35
mpdm_poke_2(void * dst,int * dsize,int * offset,const void * org,int osize,int esize)36 void *mpdm_poke_2(void *dst, int *dsize, int *offset, const void *org,
37 int osize, int esize)
38 {
39 if (org != NULL && osize) {
40 /* enough room? */
41 if (*offset + osize > *dsize) {
42 /* no; enlarge */
43 *dsize += osize;
44
45 dst = realloc(dst, *dsize * esize);
46 }
47
48 memcpy((char *) dst + (*offset * esize), org, osize * esize);
49 *offset += osize;
50 }
51
52 return dst;
53 }
54
55
mpdm_poke(void * dst,int * dsize,const void * org,int osize,int esize)56 void *mpdm_poke(void *dst, int *dsize, const void *org, int osize, int esize)
57 /* pokes (adds) org into dst, which is a dynamic string, making it grow */
58 {
59 int offset = *dsize;
60
61 return mpdm_poke_2(dst, dsize, &offset, org, osize, esize);
62 }
63
64
mpdm_pokewsn(wchar_t * dst,int * size,const wchar_t * str,int slen)65 wchar_t *mpdm_pokewsn(wchar_t *dst, int *size, const wchar_t *str, int slen)
66 /* adds a wide string to dst */
67 {
68 if (str && slen) {
69 /* open space */
70 dst = realloc(dst, (*size + slen + 1) * sizeof(wchar_t));
71
72 /* copy */
73 memcpy(&dst[*size], str, slen * sizeof(wchar_t));
74
75 /* increment counter */
76 *size += slen;
77
78 /* NULL-terminate */
79 dst[*size] = L'\0';
80 }
81
82 return dst;
83 }
84
85
mpdm_pokews(wchar_t * dst,int * dsize,const wchar_t * str)86 wchar_t *mpdm_pokews(wchar_t *dst, int *dsize, const wchar_t *str)
87 /* adds a wide string to dst */
88 {
89 return mpdm_pokewsn(dst, dsize, str, wcslen(str));
90 }
91
92
mpdm_pokev(wchar_t * dst,int * dsize,const mpdm_t v)93 wchar_t *mpdm_pokev(wchar_t *dst, int *dsize, const mpdm_t v)
94 /* adds the string in v to dst */
95 {
96 if (v != NULL) {
97 mpdm_ref(v);
98 dst = mpdm_pokews(dst, dsize, mpdm_string(v));
99 mpdm_unref(v);
100 }
101
102 return dst;
103 }
104
105
mpdm_mbstowcs(const char * str,int * s,int l)106 wchar_t *mpdm_mbstowcs(const char *str, int *s, int l)
107 /* converts an mbs to a wcs, but filling invalid chars
108 with question marks instead of just failing */
109 {
110 wchar_t *ptr = NULL;
111 char tmp[64]; /* really MB_CUR_MAX + 1 */
112 wchar_t wc;
113 int n, i, c;
114 int t = 0;
115 char *cstr;
116
117 /* allow NULL values for s */
118 if (s == NULL)
119 s = &t;
120
121 /* if there is a limit, duplicate and break the string */
122 if (l >= 0) {
123 cstr = strdup(str);
124 cstr[l] = '\0';
125 }
126 else
127 cstr = (char *) str;
128
129 /* try first a direct conversion with mbstowcs */
130 if ((*s = mbstowcs(NULL, cstr, 0)) != -1) {
131 /* direct conversion is possible; do it */
132 ptr = calloc((*s + 1), sizeof(wchar_t));
133 mbstowcs(ptr, cstr, *s);
134 }
135 else {
136 /* zero everything */
137 *s = n = i = 0;
138
139 for (;;) {
140 /* no more characters to process? */
141 if ((c = cstr[n + i]) == '\0' && i == 0)
142 break;
143
144 tmp[i++] = c;
145 tmp[i] = '\0';
146
147 /* try to convert */
148 if (mbstowcs(&wc, tmp, 1) == (int) - 1) {
149 /* can still be an incomplete multibyte char? */
150 if (c != '\0' && i <= (int) MB_CUR_MAX)
151 continue;
152 else {
153 /* too many failing bytes; skip 1 byte
154 and use the Unicode replacement char */
155 wc = L'\xfffd';
156 i = 1;
157 }
158 }
159
160 /* skip used bytes and back again */
161 n += i;
162 i = 0;
163
164 /* store new char */
165 if ((ptr = mpdm_pokewsn(ptr, s, &wc, 1)) == NULL)
166 break;
167 }
168 }
169
170 /* free the duplicate */
171 if (cstr != str)
172 free(cstr);
173
174 return ptr;
175 }
176
177
mpdm_wcstombs(const wchar_t * str,int * s)178 char *mpdm_wcstombs(const wchar_t *str, int *s)
179 /* converts a wcs to an mbs, but filling invalid chars
180 with question marks instead of just failing */
181 {
182 char *ptr = NULL;
183 int t = 0;
184
185 /* allow NULL values for s */
186 if (s == NULL)
187 s = &t;
188
189 /* try first a direct conversion with wcstombs */
190 if ((*s = wcstombs(NULL, str, 0)) != -1) {
191 /* direct conversion is possible; do it */
192 ptr = calloc(*s + 1, 1);
193 wcstombs(ptr, str, *s);
194 }
195 else {
196 /* if it fails, return "?" */
197 ptr = calloc(2, 1);
198 ptr[0] = '?';
199 return ptr;
200 }
201
202 return ptr;
203 }
204
205
mpdm_new_wcs(const wchar_t * str,int size,int cpy)206 mpdm_t mpdm_new_wcs(const wchar_t *str, int size, int cpy)
207 /* creates a new string value from a wcs */
208 {
209 wchar_t *ptr = NULL;
210
211 /* a size of -1 means 'calculate it' */
212 if (size == -1 && str != NULL)
213 size = wcslen(str);
214
215 /* create a copy? */
216 if (size >= 0 && cpy) {
217 ptr = calloc(size + 1, sizeof(wchar_t));
218
219 /* if there is a source, copy it */
220 if (str != NULL)
221 wcsncpy(ptr, str, size);
222 }
223
224 return mpdm_new(MPDM_TYPE_STRING, ptr ? ptr : str, size);
225 }
226
227
mpdm_new_mbstowcs(const char * str,int l)228 mpdm_t mpdm_new_mbstowcs(const char *str, int l)
229 /* creates a new string value from an mbs */
230 {
231 wchar_t *ptr;
232 int size;
233
234 ptr = mpdm_mbstowcs(str, &size, l);
235
236 return mpdm_new(MPDM_TYPE_STRING, ptr, size);
237 }
238
239
mpdm_new_wcstombs(const wchar_t * str)240 mpdm_t mpdm_new_wcstombs(const wchar_t *str)
241 /* creates a new mbs value from a wbs */
242 {
243 char *ptr;
244 int size;
245
246 ptr = mpdm_wcstombs(str, &size);
247
248 return mpdm_new(MPDM_TYPE_MBS, ptr, size);
249 }
250
251
mpdm_new_i(int ival)252 mpdm_t mpdm_new_i(int ival)
253 /* creates a new string value from an integer */
254 {
255 return MPDM_C(MPDM_TYPE_INTEGER, &ival, sizeof(ival));
256 }
257
258
mpdm_new_r(double rval)259 mpdm_t mpdm_new_r(double rval)
260 /* creates a new string value from a real number */
261 {
262 return MPDM_C(MPDM_TYPE_REAL, &rval, sizeof(rval));
263 }
264
265
string_persist(wchar_t * str)266 static wchar_t *string_persist(wchar_t *str)
267 /* makes the allocated string persistent */
268 {
269 mpdm_t c, v;
270
271 /* create the cache, if it does not exist yet */
272 if ((c = mpdm_get_wcs(mpdm_root(), L"__STRINGIFY__")) == NULL)
273 c = mpdm_set_wcs(mpdm_root(), MPDM_O(), L"__STRINGIFY__");
274
275 if ((v = mpdm_get_wcs(c, str)) == NULL) {
276 /* string not cached; do it */
277 v = MPDM_ENS(str, -1);
278 mpdm_set(c, v, v);
279 }
280
281 return (wchar_t *) v->data;
282 }
283
284
285 /* interface */
286
287 /**
288 * mpdm_string - Returns a string representation of a value.
289 * @v: the value
290 *
291 * Returns a string representation of a value. For strings, it's
292 * the value data itself; for any other type, a conversion to string
293 * is returned instead. These values can be used for printing but
294 * they are mostly oriented to being used as indexes in objects.
295 *
296 * The reference count value in @v is not touched.
297 * [Strings]
298 */
mpdm_string(const mpdm_t v)299 wchar_t *mpdm_string(const mpdm_t v)
300 {
301 wchar_t *str;
302
303 mpdm_ref(v);
304 str = mpdm_type_vc(v)->string(v);
305 mpdm_unrefnd(v);
306
307 return str;
308 }
309
310
311 /**
312 * mpdm_cmp_wcs - Compares two values (string version).
313 * @v1: the first value
314 * @v2: the second value
315 *
316 * Compares the @v2 string against the stringified view of @v1.
317 */
mpdm_cmp_wcs(const mpdm_t v1,const wchar_t * v2)318 int mpdm_cmp_wcs(const mpdm_t v1, const wchar_t *v2)
319 {
320 int r;
321
322 mpdm_ref(v1);
323 r = v2 == NULL ? 1 : wcscoll(mpdm_string(v1), v2);
324 mpdm_unref(v1);
325
326 return r;
327 }
328
329
330 /**
331 * mpdm_splice_s - Creates a new string value from another.
332 * @v: the original value
333 * @i: the value to be inserted
334 * @offset: offset where the substring is to be inserted
335 * @del: number of characters to delete
336 * @n: an optional pointer to the new string
337 * @d: an optional pointer to the deleted string
338 *
339 * Creates a new string value from @v, deleting @del chars at @offset
340 * and substituting them by @i. If @del is 0, no deletion is done.
341 * both @offset and @del can be negative; if this is the case, it's
342 * assumed as counting from the end of @v. If @v is NULL, @i will become
343 * the new string, and both @offset and @del will be ignored. If @v is
344 * not NULL and @i is, no insertion process is done (only deletion, if
345 * applicable).
346 *
347 * Fills @n (in not NULL) with the new string, and @d (if not NULL)
348 * with the deleted portion.
349 *
350 * Returns the new value (if created) or the deleted value (if created).
351 * [Strings]
352 */
mpdm_splice_s(const mpdm_t v,const mpdm_t i,int offset,int del,mpdm_t * n,mpdm_t * d)353 mpdm_t mpdm_splice_s(const mpdm_t v, const mpdm_t i,
354 int offset, int del, mpdm_t *n, mpdm_t *d)
355 /* do not use this; use mpdm_splice() */
356 {
357 mpdm_ref(v);
358 mpdm_ref(i);
359
360 if (n) *n = NULL;
361 if (d) *d = NULL;
362
363 if (v != NULL) {
364 wchar_t *str = mpdm_string(v);
365
366 offset = mpdm_wrap_pointers(v, offset, &del);
367
368 if (offset > mpdm_size(v))
369 offset = mpdm_size(v);
370
371 if (d) {
372 /* deleted string */
373 *d = MPDM_NS(str + offset, del);
374 }
375
376 if (n) {
377 wchar_t *ptr = NULL;
378 int s = 0;
379
380 /* copy the start of the string */
381 ptr = mpdm_pokewsn(ptr, &s, str, offset);
382
383 /* copy the inserted string */
384 ptr = mpdm_pokev(ptr, &s, i);
385
386 /* copy the reminder */
387 ptr = mpdm_pokews(ptr, &s, str + offset + del);
388
389 *n = ptr ? MPDM_ENS(ptr, s) : MPDM_S(L"");
390 }
391 }
392
393 mpdm_unref(i);
394 mpdm_unref(v);
395
396 /* returns the new value or the deleted value */
397 return n ? *n : (d ? *d : NULL);
398 }
399
400
401 /**
402 * mpdm_strcat_wcsn - Concatenates two strings (string with size version).
403 * @s1: the first string
404 * @s2: the second string
405 * @size: the size of the second string
406 *
407 * Returns a new string formed by the concatenation of @s1 and @s2.
408 * [Strings]
409 */
mpdm_strcat_wcsn(const mpdm_t s1,const wchar_t * s2,int size)410 mpdm_t mpdm_strcat_wcsn(const mpdm_t s1, const wchar_t *s2, int size)
411 {
412 mpdm_t r = NULL;
413
414 if (s1 != NULL || s2 != NULL) {
415 wchar_t *ptr = NULL;
416 int s = 0;
417
418 ptr = mpdm_pokev(ptr, &s, s1);
419 ptr = mpdm_pokewsn(ptr, &s, s2, size);
420
421 r = ptr == NULL ? MPDM_S(L"") : MPDM_ENS(ptr, s);
422 }
423
424 return r;
425 }
426
427
428 /**
429 * mpdm_strcat_wcs - Concatenates two strings (string version).
430 * @s1: the first string
431 * @s2: the second string
432 *
433 * Returns a new string formed by the concatenation of @s1 and @s2.
434 * [Strings]
435 */
mpdm_strcat_wcs(const mpdm_t s1,const wchar_t * s2)436 mpdm_t mpdm_strcat_wcs(const mpdm_t s1, const wchar_t *s2)
437 {
438 return mpdm_strcat_wcsn(s1, s2, s2 ? wcslen(s2) : 0);
439 }
440
441
442 /**
443 * mpdm_strcat - Concatenates two strings.
444 * @s1: the first string
445 * @s2: the second string
446 *
447 * Returns a new string formed by the concatenation of @s1 and @s2.
448 * [Strings]
449 */
mpdm_strcat(const mpdm_t s1,const mpdm_t s2)450 mpdm_t mpdm_strcat(const mpdm_t s1, const mpdm_t s2)
451 {
452 mpdm_t r;
453
454 mpdm_ref(s2);
455 r = mpdm_strcat_wcs(s1, s2 ? mpdm_string(s2) : NULL);
456 mpdm_unref(s2);
457
458 return r;
459 }
460
461
mpdm_ival_mbs(char * str)462 int mpdm_ival_mbs(char *str)
463 /* converts str to integer */
464 {
465 int i = 0;
466 char *fmt = "%i";
467
468 /* workaround for mingw32: as it doesn't
469 correctly parse octal and hexadecimal
470 numbers, they are tried as special cases */
471 if (str[0] == '0') {
472 if (str[1] == 'b' || str[1] == 'B') {
473 /* binary number */
474 fmt = NULL;
475 char *ptr = &str[2];
476
477 while (*ptr == '0' || *ptr == '1') {
478 i <<= 1;
479
480 if (*ptr == '1')
481 i |= 1;
482
483 ptr++;
484 }
485 }
486 else
487 if (str[1] == 'x' || str[1] == 'X')
488 fmt = "%x";
489 else
490 fmt = "%o";
491 }
492
493 if (fmt != NULL)
494 sscanf(str, fmt, &i);
495
496 return i;
497 }
498
499
500 /**
501 * mpdm_ival - Returns a value's data as an integer.
502 * @v: the value
503 *
504 * Returns a value's data as an integer.
505 * [Strings]
506 * [Value Management]
507 */
mpdm_ival(mpdm_t v)508 int mpdm_ival(mpdm_t v)
509 {
510 int i = 0;
511
512 mpdm_ref(v);
513
514 switch (mpdm_type(v)) {
515 case MPDM_TYPE_NULL:
516 break;
517
518 case MPDM_TYPE_INTEGER:
519 i = *((int *)v->data);
520 break;
521
522 case MPDM_TYPE_REAL:
523 i = (int) mpdm_rval(v);
524 break;
525
526 case MPDM_TYPE_STRING:
527 {
528 char *mbs = mpdm_wcstombs(mpdm_string(v), NULL);
529 i = mpdm_ival_mbs(mbs);
530 free(mbs);
531 }
532
533 break;
534
535 case MPDM_TYPE_ARRAY:
536 case MPDM_TYPE_OBJECT:
537 i = mpdm_count(v);
538 break;
539
540 default:
541 break;
542 }
543
544 mpdm_unref(v);
545
546 return i;
547 }
548
549
mpdm_rval_mbs(char * str)550 double mpdm_rval_mbs(char *str)
551 {
552 double r = 0.0;
553 char *prev_locale;
554
555 /* set locale to C for non locale-dependent
556 floating point conversion */
557 prev_locale = setlocale(LC_NUMERIC, "C");
558
559 /* read */
560 sscanf(str, "%lf", &r);
561
562 /* set previous locale */
563 setlocale(LC_NUMERIC, prev_locale);
564
565 return r;
566 }
567
568
569 /**
570 * mpdm_rval - Returns a value's data as a real number (double).
571 * @v: the value
572 *
573 * Returns a value's data as a real number (double float).
574 * [Strings]
575 * [Value Management]
576 */
mpdm_rval(mpdm_t v)577 double mpdm_rval(mpdm_t v)
578 {
579 double r = 0.0;
580
581 mpdm_ref(v);
582
583 switch (mpdm_type(v)) {
584 case MPDM_TYPE_NULL:
585 break;
586
587 case MPDM_TYPE_REAL:
588 r = *((double *)v->data);
589 break;
590
591 case MPDM_TYPE_INTEGER:
592 r = (double) mpdm_ival(v);
593 break;
594
595 case MPDM_TYPE_STRING:
596 {
597 /* otherwise, calculate it */
598 char *mbs = mpdm_wcstombs(mpdm_string(v), NULL);
599 r = mpdm_rval_mbs(mbs);
600 free(mbs);
601 }
602
603 break;
604
605 case MPDM_TYPE_ARRAY:
606 case MPDM_TYPE_OBJECT:
607 r = (double) mpdm_count(v);
608 break;
609
610 default:
611 break;
612 }
613
614 mpdm_unref(v);
615
616 return r;
617 }
618
619
620 /**
621 * mpdm_gettext - Translates a string to the current language.
622 * @str: the string
623 *
624 * Translates the @str string to the current language.
625 *
626 * This function can still be used even if there is no real gettext
627 * support() by manually filling the __I18N__ hash.
628 *
629 * If the string is found in the current table, the translation is
630 * returned; otherwise, the same @str value is returned.
631 * [Strings]
632 * [Localization]
633 */
mpdm_gettext(const mpdm_t str)634 mpdm_t mpdm_gettext(const mpdm_t str)
635 {
636 mpdm_t v = NULL;
637
638 if (str) {
639 mpdm_t i18n = NULL;
640
641 /* gets the cache */
642 if ((i18n = mpdm_get_wcs(mpdm_root(), L"__I18N__")) == NULL)
643 i18n = mpdm_set_wcs(mpdm_root(), MPDM_O(), L"__I18N__");
644
645 mpdm_ref(str);
646
647 /* try first the cache */
648 if ((v = mpdm_get(i18n, str)) == NULL) {
649 #ifdef CONFOPT_GETTEXT
650 char *s;
651 mpdm_t t;
652
653 /* convert to mbs */
654 t = mpdm_ref(MPDM_2MBS(str->data));
655
656 /* ask gettext for it */
657 s = gettext((char *) t->data);
658
659 if (s != t->data)
660 v = MPDM_MBS(s);
661 else
662 v = str;
663
664 mpdm_unref(t);
665
666 #else /* CONFOPT_GETTEXT */
667
668 v = str;
669
670 #endif /* CONFOPT_GETTEXT */
671
672 /* store in the cache */
673 mpdm_set(i18n, v, str);
674 }
675
676 mpdm_unref(str);
677 }
678
679 return v;
680 }
681
682
683 /**
684 * mpdm_gettext_domain - Sets domain and data directory for translations.
685 * @dom: the domain (application name)
686 * @data: directory contaning the .mo files
687 *
688 * Sets the domain (application name) and translation data for translating
689 * strings that will be returned by mpdm_gettext().@data must point to a
690 * directory containing the .mo (compiled .po) files.
691 *
692 * If there is no gettext support, returns 0, or 1 otherwise.
693 * [Strings]
694 * [Localization]
695 */
mpdm_gettext_domain(const mpdm_t dom,const mpdm_t data)696 int mpdm_gettext_domain(const mpdm_t dom, const mpdm_t data)
697 {
698 int ret = 0;
699
700 mpdm_ref(dom);
701 mpdm_ref(data);
702
703 #ifdef CONFOPT_GETTEXT
704
705 mpdm_t dm;
706 mpdm_t dt;
707
708 /* convert both to mbs,s */
709 dm = mpdm_ref(MPDM_2MBS(dom->data));
710 dt = mpdm_ref(MPDM_2MBS(data->data));
711
712 /* bind and set domain */
713 bindtextdomain((char *) dm->data, (char *) dt->data);
714 textdomain((char *) dm->data);
715
716 mpdm_set_wcs(mpdm_root(), MPDM_O(), L"__I18N__");
717
718 mpdm_unref(dt);
719 mpdm_unref(dm);
720
721 ret = 1;
722
723 #endif /* CONFOPT_GETTEXT */
724
725 #ifdef CONFOPT_WIN32
726
727 mpdm_t v;
728
729 if ((v = mpdm_get_wcs(mpdm_root(), L"ENV")) != NULL &&
730 mpdm_get_wcs(v, L"LANG") == NULL) {
731 const char *ptr = "en";
732 int c;
733 const char *win_langs[] = {
734 "en", "ar", "bg", "ca", "zh", "cz", "da", "de", "el", "en", /* 00-09 */
735 "es", "fi", "fr", "he", "hu", "is", "it", "jp", "ko", "nl", /* 0a-13 */
736 "no", "po", "pt", "rm", "ro", "ru", "sr", "sk", "sq", "sv", /* 14-1d */
737 "th", "tr", "ur", "id", "uk", "be", "sl", "et", "lv", "lt", /* 1e-27 */
738 "tg", "fa", "vi", "hy", "az" /* 28-2d */
739 };
740
741 c = GetSystemDefaultLangID() & 0x00ff;
742
743 /* MS Windows crappy language constants... */
744 if (c < sizeof(win_langs) / sizeof(char *))
745 ptr = win_langs[c];
746
747 mpdm_set_wcs(v, MPDM_MBS(ptr), L"LANG");
748 }
749
750 #endif /* CONFOPT_WIN32 */
751
752 mpdm_unref(data);
753 mpdm_unref(dom);
754
755 return ret;
756 }
757
758
759 #ifdef CONFOPT_WCWIDTH
760
mpdm_wcwidth(wchar_t c)761 int mpdm_wcwidth(wchar_t c)
762 {
763 return wcwidth(c);
764 }
765
766 #else /* CONFOPT_WCWIDTH */
767
768 #include "wcwidth.c"
769
mpdm_wcwidth(wchar_t c)770 int mpdm_wcwidth(wchar_t c)
771 {
772 return mk_wcwidth(c);
773 }
774
775 #endif /* CONFOPT_WCWIDTH */
776
s_mbstowcs(char * mbs,wchar_t * wcs)777 static wchar_t *s_mbstowcs(char *mbs, wchar_t *wcs)
778 {
779 int n;
780
781 for (n = 0; mbs[n] != '\0'; n++)
782 wcs[n] = (wchar_t) mbs[n];
783
784 return wcs;
785 }
786
787
json_s(wchar_t * o,int * l,mpdm_t v)788 static wchar_t *json_s(wchar_t *o, int *l, mpdm_t v)
789 {
790 wchar_t *p = mpdm_string(v);
791
792 while (*p) {
793 if (*p == L'\n')
794 o = mpdm_pokews(o, l, L"\\n");
795 else
796 if (*p == L'\\')
797 o = mpdm_pokews(o, l, L"\\\\");
798 else
799 if (*p == L'"')
800 o = mpdm_pokews(o, l, L"\\\"");
801 else
802 if (*p < 32) {
803 char tmp[7];
804 wchar_t wtmp[7];
805
806 sprintf(tmp, "\\u%04x", (unsigned int) *p);
807 o = mpdm_pokews(o, l, s_mbstowcs(tmp, wtmp));
808 }
809 else
810 o = mpdm_pokewsn(o, l, p, 1);
811
812 p++;
813 }
814
815 return o;
816 }
817
818
json_f(wchar_t * o,int * z,mpdm_t v,int l)819 static wchar_t *json_f(wchar_t *o, int *z, mpdm_t v, int l)
820 /* fills a %j JSON format */
821 {
822 mpdm_t w, i;
823 int n = 0, c = 0;
824
825 /* special test: upper level can only be array or object */
826 if (!l && mpdm_type(v) != MPDM_TYPE_ARRAY && mpdm_type(v) != MPDM_TYPE_OBJECT)
827 goto end;
828
829 switch (mpdm_type(v)) {
830 case MPDM_TYPE_NULL:
831 o = mpdm_pokews(o, z, L"null");
832 break;
833
834 case MPDM_TYPE_OBJECT:
835 o = mpdm_pokews(o, z, L"{");
836
837 while (mpdm_iterator(v, &n, &w, &i)) {
838 if (c)
839 o = mpdm_pokews(o, z, L",");
840
841 o = mpdm_pokews(o, z, L"\"");
842 o = json_s(o, z, i);
843 o = mpdm_pokews(o, z, L"\":");
844
845 o = json_f(o, z, w, l + 1);
846
847 c++;
848 }
849
850 o = mpdm_pokews(o, z, L"}");
851
852 break;
853
854 case MPDM_TYPE_ARRAY:
855 o = mpdm_pokews(o, z, L"[");
856
857 while (mpdm_iterator(v, &n, &w, NULL)) {
858 if (c)
859 o = mpdm_pokews(o, z, L",");
860
861 o = json_f(o, z, w, l + 1);
862
863 c++;
864 }
865
866 o = mpdm_pokews(o, z, L"]");
867
868 break;
869
870 case MPDM_TYPE_INTEGER:
871 case MPDM_TYPE_REAL:
872 o = mpdm_pokev(o, z, v);
873 break;
874
875 case MPDM_TYPE_STRING:
876 o = mpdm_pokews(o, z, L"\"");
877 o = json_s(o, z, v);
878 o = mpdm_pokews(o, z, L"\"");
879
880 break;
881
882 default:
883 o = mpdm_pokews(o, z, L"\"");
884 o = mpdm_pokev(o, z, v);
885 o = mpdm_pokews(o, z, L"\"");
886 break;
887 }
888
889 end:
890 return o;
891 }
892
893
mpdm_fmt(const mpdm_t fmt,const mpdm_t arg)894 mpdm_t mpdm_fmt(const mpdm_t fmt, const mpdm_t arg)
895 {
896 const wchar_t *i = fmt->data;
897 wchar_t c, *o = NULL;
898 int l = 0;
899 int n = 0;
900
901 mpdm_ref(fmt);
902 mpdm_ref(arg);
903
904 /* find first mark */
905 while ((c = i[n]) != L'\0' && c != L'%')
906 n++;
907
908 o = mpdm_pokewsn(o, &l, i, n);
909 i = &i[n];
910
911 /* format directive */
912 if (c == L'%') {
913 char t_fmt[128];
914 char tmp[1024];
915 char *ptr = NULL;
916 wchar_t *wptr = NULL;
917 int m = 0;
918
919 /* transfer the % */
920 t_fmt[m++] = '%';
921 i++;
922
923 /* transform the format to mbs */
924 while (*i != L'\0' &&
925 m < (int) (sizeof(t_fmt) - MB_CUR_MAX - 1) &&
926 wcschr(L"-.0123456789", *i) != NULL)
927 m += wctomb(&t_fmt[m], *i++);
928
929 /* transfer the directive */
930 m += wctomb(&t_fmt[m], *i++);
931
932 t_fmt[m] = '\0';
933
934 /* by default, copies the format */
935 strcpy(tmp, t_fmt);
936
937 switch (t_fmt[m - 1]) {
938 case 'd':
939 case 'i':
940 case 'u':
941 case 'x':
942 case 'X':
943 case 'o':
944
945 /* integer value */
946 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_ival(arg));
947 wptr = mpdm_mbstowcs(tmp, &m, -1);
948 break;
949
950 case 'f':
951
952 /* float (real) value */
953 snprintf(tmp, sizeof(tmp) - 1, t_fmt, mpdm_rval(arg));
954 wptr = mpdm_mbstowcs(tmp, &m, -1);
955 break;
956
957 case 's':
958
959 /* string value */
960 ptr = mpdm_wcstombs(mpdm_string(arg), NULL);
961 snprintf(tmp, sizeof(tmp) - 1, t_fmt, ptr);
962 free(ptr);
963 wptr = mpdm_mbstowcs(tmp, &m, -1);
964 break;
965
966 case 'b':
967 /* binary dump */
968 ptr = tmp;
969 unsigned int mask;
970 int p = 0;
971 int bits = 0;
972
973 /* zero pad? */
974 if (t_fmt[1] == '0') {
975 p = 1;
976 sscanf(&t_fmt[2], "%d", &bits);
977 }
978 else
979 sscanf(&t_fmt[1], "%d", &bits);
980
981 if (bits == 0)
982 bits = sizeof(int) * 8;
983
984 mask = 1 << (bits - 1);
985 while (mask) {
986 if (mask & (unsigned int) mpdm_ival(arg)) {
987 *ptr++ = '1';
988 p = 1;
989 }
990 else
991 if (p)
992 *ptr++ = '0';
993
994 mask >>= 1;
995 }
996
997 if (ptr == tmp)
998 *ptr++ = '0';
999
1000 *ptr = '\0';
1001 wptr = mpdm_mbstowcs(tmp, &m, -1);
1002 break;
1003
1004 case 'j':
1005 o = json_f(o, &l, arg, 0);
1006 break;
1007
1008 case 'J':
1009 /* 'lax' JSON: can be literal */
1010 o = json_f(o, &l, arg, 1);
1011 break;
1012
1013 case 't':
1014 /* time: brace-enclosed strftime mask */
1015 if (*i == L'{') {
1016 char tmp2[4096];
1017 int j = 0;
1018 struct tm *tm;
1019 time_t t = mpdm_ival(arg);
1020
1021 i++;
1022 while (*i != L'\0' && *i != L'}')
1023 wctomb(&tmp2[j++], *i++);
1024 tmp2[j] = '\0';
1025 if (*i)
1026 i++;
1027
1028 tm = localtime(&t);
1029 strftime(tmp, sizeof(tmp), tmp2, tm);
1030 wptr = mpdm_mbstowcs(tmp, &m, -1);
1031 }
1032 break;
1033
1034 case 'c':
1035
1036 /* char */
1037 c = mpdm_ival(arg);
1038 /* fallthrough ... */
1039
1040 case '%':
1041
1042 /* percent sign */
1043 o = mpdm_pokewsn(o, &l, &c, 1);
1044 break;
1045 }
1046
1047 /* transfer */
1048 if (wptr != NULL) {
1049 o = mpdm_pokewsn(o, &l, wptr, m);
1050 free(wptr);
1051 }
1052 }
1053
1054 /* fill the rest up to the end */
1055 n = 0;
1056 while (i[n] != L'\0')
1057 n++;
1058
1059 o = mpdm_pokewsn(o, &l, i, n);
1060
1061 mpdm_unref(arg);
1062 mpdm_unref(fmt);
1063
1064 return o ? MPDM_ENS(o, l) : MPDM_S(L"");
1065 }
1066
1067
1068 /**
1069 * mpdm_sprintf - Formats a sprintf()-like string.
1070 * @fmt: the string format
1071 * @args: an array of values
1072 *
1073 * Formats a string using the sprintf() format taking the values from @args.
1074 * [Strings]
1075 */
mpdm_sprintf(const mpdm_t fmt,const mpdm_t args)1076 mpdm_t mpdm_sprintf(const mpdm_t fmt, const mpdm_t args)
1077 {
1078 int n;
1079 mpdm_t v;
1080
1081 mpdm_ref(args);
1082
1083 v = fmt;
1084 for (n = 0; n < mpdm_size(args); n++)
1085 v = mpdm_fmt(v, mpdm_get_i(args, n));
1086
1087 mpdm_unref(args);
1088
1089 return v;
1090 }
1091
1092
1093 /**
1094 * mpdm_ulc - Converts a string to uppercase or lowecase.
1095 * @s: the string
1096 * @u: convert to uppercase (1) or to lowercase (0).
1097 *
1098 * Converts @s to uppercase (for @u == 1) or to lowercase (@u == 0).
1099 * [Strings]
1100 */
mpdm_ulc(const mpdm_t s,int u)1101 mpdm_t mpdm_ulc(const mpdm_t s, int u)
1102 {
1103 mpdm_t r = NULL;
1104 wchar_t *iptr, *optr;
1105 int i, n;
1106
1107 mpdm_ref(s);
1108
1109 i = mpdm_size(s);
1110
1111 optr = calloc((i + 1), sizeof(wchar_t));
1112 iptr = mpdm_string(s);
1113
1114 for (n = 0; n < i; n++)
1115 optr[n] = u ? towupper(iptr[n]) : towlower(iptr[n]);
1116
1117 r = MPDM_ENS(optr, i);
1118
1119 mpdm_unref(s);
1120
1121 return r;
1122 }
1123
1124
1125 enum {
1126 JS_ERROR = -1,
1127 JS_INCOMPLETE,
1128 JS_OCURLY,
1129 JS_OBRACK,
1130 JS_CCURLY,
1131 JS_CBRACK,
1132 JS_COMMA,
1133 JS_COLON,
1134 JS_VALUE,
1135 JS_STRING,
1136 JS_INTEGER,
1137 JS_REAL,
1138 JS_TRUE,
1139 JS_FALSE,
1140 JS_NULL,
1141 JS_ARRAY,
1142 JS_OBJECT
1143 };
1144
json_lexer(wchar_t ** sp,int * t)1145 static mpdm_t json_lexer(wchar_t **sp, int *t)
1146 {
1147 wchar_t c;
1148 wchar_t *ptr = NULL;
1149 int size = 0;
1150 mpdm_t v = NULL;
1151 wchar_t *s = *sp;
1152
1153 /* skip blanks */
1154 while (*s == L' ' || *s == L'\t' || *s == L'\n' || *s == L'\r')
1155 s++;
1156
1157 c = *s++;
1158
1159 if (c == L'{')
1160 *t = JS_OCURLY;
1161 else
1162 if (c == L'}')
1163 *t = JS_CCURLY;
1164 else
1165 if (c == L'[')
1166 *t = JS_OBRACK;
1167 else
1168 if (c == L']')
1169 *t = JS_CBRACK;
1170 else
1171 if (c == L',')
1172 *t = JS_COMMA;
1173 else
1174 if (c == L':')
1175 *t = JS_COLON;
1176 else
1177 if (c == L'"') {
1178 *t = JS_STRING;
1179
1180 while ((c = *s) != L'"' && c != L'\0') {
1181 char tmp[5];
1182 int i;
1183
1184 if (c == L'\\') {
1185 s++;
1186 c = *s;
1187 switch (c) {
1188 case 'n': c = L'\n'; break;
1189 case 'r': c = L'\r'; break;
1190 case 't': c = L'\t'; break;
1191 case 'u': /* hex char */
1192 s++;
1193 tmp[0] = (char)*s; s++;
1194 tmp[1] = (char)*s; s++;
1195 tmp[2] = (char)*s; s++;
1196 tmp[3] = (char)*s;
1197 tmp[4] = '\0';
1198
1199 sscanf(tmp, "%04x", &i);
1200 c = (wchar_t) i;
1201 break;
1202 }
1203 }
1204
1205 ptr = mpdm_pokewsn(ptr, &size, &c, 1);
1206 s++;
1207 }
1208
1209 if (c != L'\0')
1210 s++;
1211
1212 if (ptr == NULL)
1213 v = MPDM_S(L"");
1214 else
1215 v = MPDM_ENS(ptr, size);
1216 }
1217 else
1218 if (c == L'-' || (c >= L'0' && c <= L'9') || c == L'.') {
1219 *t = JS_INTEGER;
1220
1221 ptr = mpdm_pokewsn(ptr, &size, &c, 1);
1222
1223 while (((c = *s) >= L'0' && c <= L'9') || c == L'.') {
1224 if (c == L'.')
1225 *t = JS_REAL;
1226
1227 ptr = mpdm_pokewsn(ptr, &size, &c, 1);
1228 s++;
1229 }
1230
1231 v = MPDM_ENS(ptr, size);
1232
1233 if (*t == JS_REAL)
1234 v = MPDM_R(mpdm_rval(v));
1235 else
1236 v = MPDM_I(mpdm_ival(v));
1237 }
1238 else
1239 if (c == 't' && wcsncmp(s, L"rue", 3) == 0) {
1240 s += 3;
1241 *t = JS_TRUE;
1242 v = mpdm_bool(1);
1243 }
1244 else
1245 if (c == 'f' && wcsncmp(s, L"alse", 4) == 0) {
1246 s += 4;
1247 *t = JS_FALSE;
1248 v = mpdm_bool(0);
1249 }
1250 else
1251 if (c == 'n' && wcsncmp(s, L"ull", 3) == 0) {
1252 s += 3;
1253 *t = JS_NULL;
1254 }
1255 else
1256 *t = JS_ERROR;
1257
1258 *sp = s;
1259
1260 return v;
1261 }
1262
1263
1264 static mpdm_t json_parse_array(wchar_t **s, int *t);
1265 static mpdm_t json_parse_object(wchar_t **s, int *t);
1266
json_value(wchar_t ** s,int * t,mpdm_t v)1267 static mpdm_t json_value(wchar_t **s, int *t, mpdm_t v)
1268 {
1269 if (*t == JS_OBRACK)
1270 v = json_parse_array(s, t);
1271 else
1272 if (*t == JS_OCURLY)
1273 v = json_parse_object(s, t);
1274
1275 if (*t >= JS_VALUE)
1276 *t = JS_VALUE;
1277 else {
1278 v = mpdm_void(v);
1279 *t = JS_ERROR;
1280 }
1281
1282 return v;
1283 }
1284
1285
json_pair(wchar_t ** s,int * t,mpdm_t k)1286 static mpdm_t json_pair(wchar_t **s, int *t, mpdm_t k)
1287 {
1288 mpdm_t v = NULL;
1289
1290 if (*t == JS_STRING) {
1291 v = json_lexer(s, t);
1292
1293 if (*t == JS_COLON) {
1294 v = json_lexer(s, t);
1295 v = json_value(s, t, v);
1296 }
1297 else
1298 *t = JS_ERROR;
1299 }
1300 else
1301 *t = JS_ERROR;
1302
1303 if (*t >= JS_VALUE)
1304 *t = JS_VALUE;
1305 else {
1306 k = mpdm_void(k);
1307 v = mpdm_void(v);
1308 *t = JS_ERROR;
1309 }
1310
1311 return v;
1312 }
1313
1314
json_parse_object(wchar_t ** s,int * t)1315 static mpdm_t json_parse_object(wchar_t **s, int *t)
1316 {
1317 mpdm_t h = MPDM_O();
1318 mpdm_t k = NULL;
1319 int tt;
1320
1321 *t = JS_INCOMPLETE;
1322
1323 k = json_lexer(s, &tt);
1324
1325 if (tt == JS_CCURLY)
1326 *t = JS_OBJECT;
1327 else {
1328 mpdm_t w = NULL;
1329
1330 w = json_pair(s, &tt, k);
1331
1332 if (tt == JS_VALUE) {
1333 mpdm_set(h, w, k);
1334
1335 while (*t == JS_INCOMPLETE) {
1336 k = json_lexer(s, &tt);
1337
1338 if (tt == JS_CCURLY)
1339 *t = JS_OBJECT;
1340 else
1341 if (tt == JS_COMMA) {
1342 k = json_lexer(s, &tt);
1343 w = json_pair(s, &tt, k);
1344
1345 if (tt == JS_VALUE)
1346 mpdm_set(h, w, k);
1347 else
1348 *t = JS_ERROR;
1349 }
1350 else
1351 *t = JS_ERROR;
1352 }
1353 }
1354 else
1355 *t = JS_ERROR;
1356 }
1357
1358 if (*t == JS_ERROR)
1359 h = mpdm_void(h);
1360
1361 return h;
1362 }
1363
1364
json_parse_array(wchar_t ** s,int * t)1365 static mpdm_t json_parse_array(wchar_t **s, int *t)
1366 {
1367 mpdm_t a = MPDM_A(0);
1368 mpdm_t w = NULL;
1369 int tt;
1370
1371 *t = JS_INCOMPLETE;
1372
1373 w = json_lexer(s, &tt);
1374
1375 if (tt == JS_CBRACK)
1376 *t = JS_ARRAY;
1377 else {
1378 w = json_value(s, &tt, w);
1379
1380 if (tt == JS_VALUE) {
1381 mpdm_push(a, w);
1382
1383 while (*t == JS_INCOMPLETE) {
1384 w = json_lexer(s, &tt);
1385
1386 if (tt == JS_CBRACK)
1387 *t = JS_ARRAY;
1388 else
1389 if (tt == JS_COMMA) {
1390 w = json_lexer(s, &tt);
1391 w = json_value(s, &tt, w);
1392
1393 if (tt == JS_VALUE)
1394 mpdm_push(a, w);
1395 else
1396 *t = JS_ERROR;
1397 }
1398 else
1399 *t = JS_ERROR;
1400 }
1401 }
1402 else
1403 *t = JS_ERROR;
1404 }
1405
1406 if (*t == JS_ERROR)
1407 a = mpdm_void(a);
1408
1409 return a;
1410 }
1411
1412
json_parser(wchar_t ** s)1413 mpdm_t json_parser(wchar_t **s)
1414 {
1415 mpdm_t v = NULL;
1416 int t;
1417
1418 v = json_lexer(s, &t);
1419
1420 if (t == JS_OCURLY)
1421 v = json_parse_object(s, &t);
1422 else
1423 if (t == JS_OBRACK)
1424 v = json_parse_array(s, &t);
1425 else
1426 t = JS_ERROR;
1427
1428 if (t != JS_ARRAY && t != JS_OBJECT)
1429 v = mpdm_void(v);
1430
1431 return v;
1432 }
1433
1434
json_parser_lax(wchar_t ** s)1435 mpdm_t json_parser_lax(wchar_t **s)
1436 {
1437 mpdm_t v = NULL;
1438 int t;
1439
1440 v = json_lexer(s, &t);
1441
1442 if (t == JS_OCURLY)
1443 v = json_parse_object(s, &t);
1444 else
1445 if (t == JS_OBRACK)
1446 v = json_parse_array(s, &t);
1447
1448 if (t <= JS_VALUE)
1449 v = mpdm_void(v);
1450
1451 return v;
1452 }
1453
1454
1455 /* scanf working buffers */
1456 #define SCANF_BUF_SIZE 1024
1457 static wchar_t scanf_yset[SCANF_BUF_SIZE];
1458 static wchar_t scanf_nset[SCANF_BUF_SIZE];
1459 static wchar_t scanf_mark[SCANF_BUF_SIZE];
1460 static int (*scanf_cb)(wint_t);
1461
1462 struct {
1463 wchar_t cmd;
1464 wchar_t *yset;
1465 wchar_t *nset;
1466 } scanf_sets[] = {
1467 { L's', L"", L" \t"},
1468 { L'u', L"0123456789", L""},
1469 { L'd', L"-0123456789", L""},
1470 { L'i', L"-0123456789", L""},
1471 { L'f', L"-0123456789.", L""},
1472 { L'x', L"-0123456789xabcdefABCDEF", L""},
1473 { L'\0', NULL, NULL},
1474 };
1475
1476 char *strptime(const char *s, const char *format, struct tm *tm);
1477
niswalpha(wint_t i)1478 static int niswalpha(wint_t i) { return !iswalpha(i); }
1479
1480 /**
1481 * mpdm_sscanf - Extracts data like sscanf().
1482 * @str: the string to be parsed
1483 * @fmt: the string format
1484 * @offset: the character offset to start scanning
1485 *
1486 * Extracts data from a string using a special format pattern, very
1487 * much like the scanf() series of functions in the C library. Apart
1488 * from the standard percent-sign-commands (s, u, d, i, f, x,
1489 * n, [; with optional size and * to ignore), it implements S,
1490 * to match a string of characters upto what follows in the format
1491 * string; w, to match an alphabetic word (taking locale
1492 * into account); W, to match the inverse; and r, to return the rest
1493 * of the string. Also, the [ set of characters can include
1494 * other % formats.
1495 *
1496 * Returns an array with the extracted values. If %n is used, the
1497 * position in the scanned string is returned as the value.
1498 * [Strings]
1499 */
mpdm_sscanf(const mpdm_t str,const mpdm_t fmt,int offset)1500 mpdm_t mpdm_sscanf(const mpdm_t str, const mpdm_t fmt, int offset)
1501 {
1502 wchar_t *i = mpdm_string(str);
1503 wchar_t *f = mpdm_string(fmt);
1504 mpdm_t r;
1505
1506 mpdm_ref(fmt);
1507 mpdm_ref(str);
1508
1509 i += offset;
1510 r = MPDM_A(0);
1511
1512 while (str && fmt && *f) {
1513 if (*f == L'%') {
1514 wchar_t *ptr = NULL;
1515 int size = 0;
1516 wchar_t cmd;
1517 int vsize = 0;
1518 int ignore = 0;
1519 int msize = 0;
1520
1521 /* empty all buffers */
1522 scanf_yset[0] = scanf_nset[0] = scanf_mark[0] = L'\0';
1523 scanf_cb = NULL;
1524
1525 f++;
1526
1527 /* an asterisk? don't return next value */
1528 if (*f == L'*') {
1529 ignore = 1;
1530 f++;
1531 }
1532
1533 /* does it have a size? */
1534 while (wcschr(L"0123456789", *f)) {
1535 vsize *= 10;
1536 vsize += *f - L'0';
1537 f++;
1538 }
1539
1540 /* if no size, set it to an arbitrary big limit */
1541 if (!vsize)
1542 vsize = 0xfffffff;
1543
1544 /* now *f should contain a command */
1545 cmd = *f;
1546 f++;
1547
1548 /* is it a verbatim percent sign? */
1549 if (cmd == L'%') {
1550 vsize = 1;
1551 ignore = 1;
1552 wcscpy(scanf_yset, L"%");
1553 }
1554 else
1555 /* a position? */
1556 if (cmd == L'n') {
1557 vsize = 0;
1558 ignore = 1;
1559 mpdm_push(r, MPDM_I(i - mpdm_string(str)));
1560 }
1561 else
1562 /* string upto a mark */
1563 if (cmd == L'S') {
1564 wchar_t *tmp = f;
1565
1566 /* fill the mark upto another command */
1567 while (*tmp) {
1568 if (*tmp == L'%') {
1569 tmp++;
1570
1571 /* is it an 'n'? ignore and go on */
1572 if (*tmp == L'n') {
1573 tmp++;
1574 continue;
1575 }
1576 else
1577 if (*tmp == L'%')
1578 scanf_mark[msize++] = *tmp;
1579 else
1580 break;
1581 }
1582 else
1583 scanf_mark[msize++] = *tmp;
1584
1585 tmp++;
1586 }
1587
1588 scanf_mark[msize] = L'\0';
1589 }
1590 else
1591 /* alphanumeric words */
1592 if (cmd == L'w') {
1593 scanf_cb = iswalpha;
1594 }
1595 else
1596 /* not alphanumeric words */
1597 if (cmd == L'W') {
1598 scanf_cb = niswalpha;
1599 }
1600 else
1601 /* rest of the string */
1602 if (cmd == L'r') {
1603 /* do nothing; there are no filters,
1604 therefore all is matched */
1605 }
1606 else
1607 /* raw set */
1608 if (cmd == L'[') {
1609 int n = 0;
1610 wchar_t *set = scanf_yset;
1611
1612 /* is it an inverse set? */
1613 if (*f == L'^') {
1614 set = scanf_nset;
1615 f++;
1616 }
1617
1618 /* first one is a ]? add it */
1619 if (*f == L']') {
1620 set[n++] = *f;
1621 f++;
1622 }
1623
1624 /* now build the set */
1625 for (; n < SCANF_BUF_SIZE - 1 && *f && *f != L']'; f++) {
1626 /* is it a range? */
1627 if (*f == L'-') {
1628 f++;
1629
1630 /* start or end? hyphen itself */
1631 if (n == 0 || *f == L']')
1632 set[n++] = L'-';
1633 else {
1634 /* pick previous char */
1635 wchar_t c = set[n - 1];
1636
1637 /* fill */
1638 while (n < SCANF_BUF_SIZE - 1 && c < *f)
1639 set[n++] = ++c;
1640 }
1641 }
1642 else
1643 /* is it another command? */
1644 if (*f == L'%') {
1645 int i;
1646
1647 f++;
1648 for (i = 0; scanf_sets[i].cmd; i++) {
1649 if (*f == scanf_sets[i].cmd) {
1650 set[n] = L'\0';
1651 wcscat(set, scanf_sets[i].yset);
1652 n += wcslen(scanf_sets[i].yset);
1653 break;
1654 }
1655 }
1656 }
1657 else
1658 set[n++] = *f;
1659 }
1660
1661 /* skip the ] */
1662 f++;
1663
1664 set[n] = L'\0';
1665 }
1666 else
1667 /* strptime() format */
1668 if (cmd == L't') {
1669 if (*f == L'{') {
1670 char tmp_f[2048];
1671 int n = 0;
1672 struct tm tm;
1673 char *cptr, *cptr2;
1674
1675 f++;
1676 while (*f != L'\0' && *f != L'}')
1677 wctomb(&tmp_f[n++], *f++);
1678 tmp_f[n] = '\0';
1679
1680 if (*f)
1681 f++;
1682
1683 cptr = mpdm_wcstombs(i, NULL);
1684 memset(&tm, '\0', sizeof(tm));
1685
1686 #ifdef CONFOPT_STRPTIME
1687 cptr2 = strptime(cptr, tmp_f, &tm);
1688 #else
1689 cptr2 = NULL;
1690 #endif
1691
1692 if (cptr2 != NULL) {
1693 time_t t = mktime(&tm);
1694
1695 i += (cptr2 - cptr);
1696 mpdm_push(r, MPDM_I(t));
1697 }
1698
1699 free(cptr);
1700 continue;
1701 }
1702 }
1703 else
1704 /* JSON parsing */
1705 if (cmd == L'j') {
1706 mpdm_push(r, json_parser(&i));
1707 }
1708 /* 'lax' JSON parsing */
1709 if (cmd == L'J') {
1710 mpdm_push(r, json_parser_lax(&i));
1711 }
1712 else
1713 /* a standard set? */
1714 {
1715 int n;
1716
1717 for (n = 0; scanf_sets[n].cmd != L'\0'; n++) {
1718 if (cmd == scanf_sets[n].cmd) {
1719 wcscpy(scanf_yset, scanf_sets[n].yset);
1720 wcscpy(scanf_nset, scanf_sets[n].nset);
1721 break;
1722 }
1723 }
1724 }
1725
1726 /* now fill the dynamic string */
1727 while (vsize &&
1728 !wcschr(scanf_nset, *i) &&
1729 (scanf_cb == NULL || scanf_cb(*i)) &&
1730 (scanf_yset[0] == L'\0' || wcschr(scanf_yset, *i)) &&
1731 (msize == 0 || wcsncmp(i, scanf_mark, msize) != 0)) {
1732
1733 /* only add if not being ignored */
1734 if (!ignore)
1735 ptr = mpdm_pokewsn(ptr, &size, i, 1);
1736
1737 i++;
1738 vsize--;
1739 }
1740
1741 if (!ignore && size) {
1742 mpdm_push(r, MPDM_ENS(ptr, size));
1743 }
1744 }
1745 else
1746 if (*f == L' ' || *f == L'\t') {
1747 /* if it's a blank, sync to next non-blank */
1748 f++;
1749
1750 while (*i == L' ' || *i == L'\t')
1751 i++;
1752 }
1753 else
1754 /* test for literals in the format string */
1755 if (*i == *f) {
1756 i++;
1757 f++;
1758 }
1759 else
1760 break;
1761 }
1762
1763 mpdm_unref(str);
1764 mpdm_unref(fmt);
1765
1766 return r;
1767 }
1768
1769
1770 /**
1771 * mpdm_tr - Transliterates a string.
1772 * @str: the strnig
1773 * @s1: characters to be changed
1774 * @s2: characters to replace those in s1
1775 *
1776 * Creates a copy of @str, which will have all characters in @s1
1777 * replaced by those in @s2 matching their position.
1778 */
mpdm_tr(mpdm_t str,mpdm_t s1,mpdm_t s2)1779 mpdm_t mpdm_tr(mpdm_t str, mpdm_t s1, mpdm_t s2)
1780 {
1781 mpdm_t r;
1782 wchar_t *ptr;
1783 wchar_t *cs1;
1784 wchar_t *cs2;
1785 wchar_t c;
1786
1787 mpdm_ref(str);
1788 mpdm_ref(s1);
1789 mpdm_ref(s2);
1790
1791 /* create a copy of the string */
1792 r = MPDM_S(mpdm_string(str));
1793
1794 ptr = mpdm_string(r);
1795 cs1 = mpdm_string(s1);
1796 cs2 = mpdm_string(s2);
1797
1798 while ((c = *ptr)) {
1799 int n;
1800
1801 for (n = 0; cs1[n] && cs2[n]; n++) {
1802 if (c == cs1[n]) {
1803 *ptr = cs2[n];
1804 break;
1805 }
1806 }
1807
1808 ptr++;
1809 }
1810
1811 mpdm_unref(s2);
1812 mpdm_unref(s1);
1813 mpdm_unref(str);
1814
1815 return r;
1816 }
1817
1818
1819 /**
1820 * mpdm_escape - Escapes sets of characters in a string.
1821 * @v: the string
1822 * @low: lower character limit
1823 * @high: higher character limit
1824 * @f: format to apply
1825 *
1826 * Escapes characters from the @v string that are lower than
1827 * @low or higher than @high, applying the @f format, that can
1828 * be a string for a fmt() / sprintf() format or an executable
1829 * value.
1830 */
mpdm_escape(mpdm_t v,wchar_t low,wchar_t high,mpdm_t f)1831 mpdm_t mpdm_escape(mpdm_t v, wchar_t low, wchar_t high, mpdm_t f)
1832 {
1833 wchar_t *iptr, *optr;
1834 int z = 0;
1835 int n = 0;
1836
1837 mpdm_ref(v);
1838 mpdm_ref(f);
1839
1840 iptr = mpdm_string(v);
1841 optr = NULL;
1842
1843 while (iptr[n]) {
1844 int m;
1845
1846 /* skip characters inside range */
1847 for (m = n; iptr[m] && iptr[m] >= low && iptr[m] <= high; m++);
1848
1849 /* copy them */
1850 optr = mpdm_pokewsn(optr, &z, &iptr[n], m - n);
1851
1852 /* now apply format to all characters outside the range */
1853 while (iptr[m] && (iptr[m] < low || iptr[m] > high)) {
1854 mpdm_t w;
1855 wchar_t wc = iptr[m];
1856
1857 switch (mpdm_type(f)) {
1858 case MPDM_TYPE_STRING:
1859 w = mpdm_fmt(f, MPDM_I((int) wc));
1860 optr = mpdm_pokev(optr, &z, w);
1861 break;
1862
1863 default:
1864 break;
1865 }
1866
1867 m++;
1868 }
1869
1870 n = m;
1871 }
1872
1873 mpdm_unref(f);
1874 mpdm_unref(v);
1875
1876 return optr ? MPDM_NS(optr, z) : MPDM_S(L"");
1877 }
1878
1879
1880 /**
1881 * mpdm_utf8_to_wc - Converts from utf8 to wchar (streaming).
1882 * @w: convert wide char
1883 * @s: temporal state
1884 * @c: char read from stream
1885 *
1886 * Converts a stream of utf8 characters to wide char ones. The input
1887 * stream is read one byte at a time from @c and composed into @w
1888 * until a Unicode codepoint is ready. The @s integer keeps an internal
1889 * state change and must be set to 0 before the stream is read. It
1890 * detects encoding errors; in this case, the special Unicode
1891 * char U+FFFD is returned.
1892 *
1893 * When 0 is returned, a new wide char is available into @w. If
1894 * the stream is interrupted in the middle of a multibyte character,
1895 * the @s state variable will not be 0.
1896 */
mpdm_utf8_to_wc(wchar_t * w,int * s,char c)1897 int mpdm_utf8_to_wc(wchar_t *w, int *s, char c)
1898 {
1899 if (!*s && (c & 0x80) == 0) { /* 1 byte char */
1900 *w = c;
1901 }
1902 else
1903 if (!*s && (c & 0xe0) == 0xc0) { /* 2 byte char */
1904 *w = (c & 0x1f) << 6; *s = 1;
1905 }
1906 else
1907 if (!*s && (c & 0xf0) == 0xe0) { /* 3 byte char */
1908 *w = (c & 0x0f) << 12; *s = 2;
1909 }
1910 else
1911 if (!*s && (c & 0xf8) == 0xf0) { /* 4 byte char */
1912 *w = (c & 0x07) << 18; *s = 3;
1913 }
1914 else
1915 if (*s && (c & 0xc0) == 0x80) { /* continuation byte */
1916 switch (*s) {
1917 case 3: *w |= (c & 0x3f) << 12; break;
1918 case 2: *w |= (c & 0x3f) << 6; break;
1919 case 1: *w |= (c & 0x3f); break;
1920 }
1921
1922 (*s)--;
1923 }
1924 else {
1925 *w = L'\xfffd';
1926 *s = 0;
1927 }
1928
1929 return *s;
1930 }
1931
1932
1933 /**
1934 * mpdm_chomp - Deletes end of line characters at the end of a string.
1935 * @s: the value
1936 *
1937 * Returns a new string value like the original one but without any
1938 * possible \n or \r at the end.
1939 */
mpdm_chomp(mpdm_t s)1940 mpdm_t mpdm_chomp(mpdm_t s)
1941 {
1942 mpdm_t r = NULL;
1943 wchar_t *ptr;
1944 int n;
1945
1946 mpdm_ref(s);
1947 ptr = mpdm_string(s);
1948 n = mpdm_size(s);
1949
1950 /* discards end of lines at the end */
1951 for (; n && (ptr[n - 1] == L'\n' || ptr[n - 1] == L'\r'); n--);
1952
1953 r = n ? MPDM_NS(ptr, n) : MPDM_S(L"");
1954
1955 mpdm_unref(s);
1956
1957 return r;
1958 }
1959
1960
1961 /** type vc **/
1962
vc_default_string(mpdm_t v)1963 wchar_t *vc_default_string(mpdm_t v)
1964 {
1965 char tmp[64];
1966
1967 sprintf(tmp, "%p", v);
1968 return string_persist(mpdm_mbstowcs(tmp, NULL, -1));
1969 }
1970
vc_string_is_true(mpdm_t v)1971 static int vc_string_is_true(mpdm_t v)
1972 {
1973 wchar_t *ptr = mpdm_string(v);
1974
1975 /* if it's "" or "0", it's false */
1976 return !(*ptr == L'\0' || wcscmp(ptr, L"0") == 0);
1977 }
1978
vc_string_count(mpdm_t v)1979 static int vc_string_count(mpdm_t v)
1980 {
1981 return wcslen((wchar_t *)v->data);
1982 }
1983
vc_string_get_i(mpdm_t v,int i)1984 static mpdm_t vc_string_get_i(mpdm_t v, int i)
1985 /* return a one-char value by offset */
1986 {
1987 mpdm_t r = NULL;
1988 wchar_t *ptr = mpdm_string(v);
1989 int l = wcslen(ptr);
1990
1991 /* negative values */
1992 if (i < 0)
1993 i = l + i;
1994
1995 if (i >= 0 && i < l)
1996 r = MPDM_NS(&ptr[i], 1);
1997
1998 return r;
1999 }
2000
vc_string_get(mpdm_t v,mpdm_t i)2001 static mpdm_t vc_string_get(mpdm_t v, mpdm_t i)
2002 {
2003 return vc_string_get_i(v, mpdm_ival(i));
2004 }
2005
2006
vc_string_string(mpdm_t v)2007 static wchar_t *vc_string_string(mpdm_t v)
2008 {
2009 return (wchar_t *)v->data;
2010 }
2011
2012
vc_integer_is_true(mpdm_t v)2013 static int vc_integer_is_true(mpdm_t v)
2014 {
2015 return mpdm_ival(v);
2016 }
2017
vc_integer_string(mpdm_t v)2018 static wchar_t *vc_integer_string(mpdm_t v)
2019 {
2020 char tmp[64];
2021 wchar_t *str;
2022
2023 sprintf(tmp, "%d", mpdm_ival(v));
2024 str = mpdm_mbstowcs(tmp, NULL, -1);
2025
2026 return string_persist(str);
2027 }
2028
2029
vc_real_is_true(mpdm_t v)2030 static int vc_real_is_true(mpdm_t v)
2031 {
2032 return (mpdm_rval(v) != 0.0);
2033 }
2034
vc_real_string(mpdm_t v)2035 static wchar_t *vc_real_string(mpdm_t v)
2036 {
2037 char tmp[64];
2038 wchar_t *str;
2039 char *prev_locale = setlocale(LC_NUMERIC, "C");
2040
2041 sprintf(tmp, "%.15lf", mpdm_rval(v));
2042
2043 setlocale(LC_NUMERIC, prev_locale);
2044
2045 /* manually strip useless zeroes */
2046 if (strchr(tmp, '.') != NULL) {
2047 char *ptr;
2048
2049 for (ptr = tmp + strlen(tmp) - 1; *ptr == '0'; ptr--);
2050
2051 /* if it's over the ., strip it also */
2052 if (*ptr != '.')
2053 ptr++;
2054
2055 *ptr = '\0';
2056 }
2057
2058 str = mpdm_mbstowcs(tmp, NULL, -1);
2059
2060 return string_persist(str);
2061 }
2062
2063
vc_string_iterator(mpdm_t set,int * context,mpdm_t * v,mpdm_t * i)2064 static int vc_string_iterator(mpdm_t set, int *context, mpdm_t *v, mpdm_t *i)
2065 {
2066 int ret = *context;
2067 wchar_t *ptr = (wchar_t *)set->data;
2068
2069 if (ret < wcslen(ptr)) {
2070 /* return a string with one char */
2071 if (v) *v = MPDM_NS(&ptr[ret], 1);
2072 if (i) *i = MPDM_I(ret);
2073
2074 ret++;
2075 }
2076 else
2077 ret = 0;
2078
2079 return *context = ret;
2080 }
2081
2082
vc_number_iterator(mpdm_t set,int * context,mpdm_t * v,mpdm_t * i)2083 static int vc_number_iterator(mpdm_t set, int *context, mpdm_t *v, mpdm_t *i)
2084 {
2085 int ret = *context;
2086
2087 if (ret < mpdm_ival(set)) {
2088 if (v) *v = MPDM_I(ret);
2089 if (i) *i = MPDM_I(ret);
2090
2091 ret++;
2092 }
2093 else
2094 ret = 0;
2095
2096 return *context = ret;
2097 }
2098
2099
vc_string_map(mpdm_t set,mpdm_t filter,mpdm_t ctxt)2100 static mpdm_t vc_string_map(mpdm_t set, mpdm_t filter, mpdm_t ctxt)
2101 {
2102 mpdm_t r;
2103
2104 /* is the filter a regex? */
2105 if (mpdm_type(filter) == MPDM_TYPE_REGEX) {
2106 /* match the regex sequentially until the end of the string */
2107 mpdm_t v;
2108 int n = 0;
2109
2110 r = MPDM_A(0);
2111
2112 while ((v = mpdm_regex(set, filter, n))) {
2113 mpdm_push(r, v);
2114 n = mpdm_regex_offset + mpdm_regex_size;
2115 }
2116 }
2117 else
2118 /* do the default */
2119 r = vc_default_map(set, filter, ctxt);
2120
2121 return r;
2122 }
2123
2124
2125 struct mpdm_type_vc mpdm_vc_string = { /* VC */
2126 L"string", /* name */
2127 vc_default_destroy, /* destroy */
2128 vc_string_is_true, /* is_true */
2129 vc_string_count, /* count */
2130 vc_string_get_i, /* get_i */
2131 vc_string_get, /* get */
2132 vc_string_string, /* string */
2133 vc_default_del_i, /* del_i */
2134 vc_default_del, /* del */
2135 vc_default_set_i, /* set_i */
2136 vc_default_set, /* set */
2137 vc_default_exec, /* exec */
2138 vc_string_iterator, /* iterator */
2139 vc_string_map /* map */
2140 };
2141
2142 struct mpdm_type_vc mpdm_vc_mbs = { /* VC */
2143 L"mbs", /* name */
2144 vc_default_destroy, /* destroy */
2145 vc_default_is_true, /* is_true */
2146 vc_default_count, /* count */
2147 vc_default_get_i, /* get_i */
2148 vc_default_get, /* get */
2149 vc_default_string, /* string */
2150 vc_default_del_i, /* del_i */
2151 vc_default_del, /* del */
2152 vc_default_set_i, /* set_i */
2153 vc_default_set, /* set */
2154 vc_default_exec, /* exec */
2155 vc_default_iterator, /* iterator */
2156 vc_default_map /* map */
2157 };
2158
2159 struct mpdm_type_vc mpdm_vc_integer = { /* VC */
2160 L"integer", /* name */
2161 vc_default_destroy, /* destroy */
2162 vc_integer_is_true, /* is_true */
2163 vc_default_count, /* count */
2164 vc_default_get_i, /* get_i */
2165 vc_default_get, /* get */
2166 vc_integer_string, /* string */
2167 vc_default_del_i, /* del_i */
2168 vc_default_del, /* del */
2169 vc_default_set_i, /* set_i */
2170 vc_default_set, /* set */
2171 vc_default_exec, /* exec */
2172 vc_number_iterator, /* iterator */
2173 vc_default_map /* map */
2174 };
2175
2176 struct mpdm_type_vc mpdm_vc_real = { /* VC */
2177 L"real", /* name */
2178 vc_default_destroy, /* destroy */
2179 vc_real_is_true, /* is_true */
2180 vc_default_count, /* count */
2181 vc_default_get_i, /* get_i */
2182 vc_default_get, /* get */
2183 vc_real_string, /* string */
2184 vc_default_del_i, /* del_i */
2185 vc_default_del, /* del */
2186 vc_default_set_i, /* set_i */
2187 vc_default_set, /* set */
2188 vc_default_exec, /* exec */
2189 vc_number_iterator, /* iterator */
2190 vc_default_map /* map */
2191 };
2192