1 /*
2  * fontconfig/src/fclang.c
3  *
4  * Copyright © 2002 Keith Packard
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of the author(s) not be used in
11  * advertising or publicity pertaining to distribution of the software without
12  * specific, written prior permission.  The authors make no
13  * representations about the suitability of this software for any purpose.  It
14  * is provided "as is" without express or implied warranty.
15  *
16  * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18  * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20  * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22  * PERFORMANCE OF THIS SOFTWARE.
23  */
24 
25 #include "fcint.h"
26 #include "fcftint.h"
27 
28 /* Objects MT-safe for readonly access. */
29 
30 typedef struct {
31     const FcChar8    	lang[16];
32     const FcCharSet	charset;
33 } FcLangCharSet;
34 
35 typedef struct {
36     int begin;
37     int end;
38 } FcLangCharSetRange;
39 
40 #include "../fc-lang/fclang.h"
41 
42 struct _FcLangSet {
43     FcStrSet	*extra;
44     FcChar32    map_size;
45     FcChar32	map[NUM_LANG_SET_MAP];
46 };
47 
48 static int FcLangSetIndex (const FcChar8 *lang);
49 
50 
51 static void
FcLangSetBitSet(FcLangSet * ls,unsigned int id)52 FcLangSetBitSet (FcLangSet    *ls,
53 		 unsigned int  id)
54 {
55   unsigned int bucket;
56 
57   id = fcLangCharSetIndices[id];
58   bucket = id >> 5;
59   if (bucket >= ls->map_size)
60     return; /* shouldn't happen really */
61 
62   ls->map[bucket] |= ((FcChar32) 1U << (id & 0x1f));
63 }
64 
65 static FcBool
FcLangSetBitGet(const FcLangSet * ls,unsigned int id)66 FcLangSetBitGet (const FcLangSet *ls,
67 		 unsigned int     id)
68 {
69   unsigned int bucket;
70 
71   id = fcLangCharSetIndices[id];
72   bucket = id >> 5;
73   if (bucket >= ls->map_size)
74     return FcFalse;
75 
76   return ((ls->map[bucket] >> (id & 0x1f)) & 1) ? FcTrue : FcFalse;
77 }
78 
79 static void
FcLangSetBitReset(FcLangSet * ls,unsigned int id)80 FcLangSetBitReset (FcLangSet    *ls,
81 		   unsigned int  id)
82 {
83   unsigned int bucket;
84 
85   id = fcLangCharSetIndices[id];
86   bucket = id >> 5;
87   if (bucket >= ls->map_size)
88     return; /* shouldn't happen really */
89 
90   ls->map[bucket] &= ~((FcChar32) 1U << (id & 0x1f));
91 }
92 
93 FcLangSet *
FcFreeTypeLangSet(const FcCharSet * charset,const FcChar8 * exclusiveLang)94 FcFreeTypeLangSet (const FcCharSet  *charset,
95 		   const FcChar8    *exclusiveLang)
96 {
97     int		    i, j;
98     FcChar32	    missing;
99     const FcCharSet *exclusiveCharset = 0;
100     FcLangSet	    *ls;
101 
102     if (exclusiveLang)
103 	exclusiveCharset = FcLangGetCharSet (exclusiveLang);
104     ls = FcLangSetCreate ();
105     if (!ls)
106 	return 0;
107     if (FcDebug() & FC_DBG_LANGSET)
108     {
109 	printf ("font charset");
110 	FcCharSetPrint (charset);
111 	printf ("\n");
112     }
113     for (i = 0; i < NUM_LANG_CHAR_SET; i++)
114     {
115 	if (FcDebug() & FC_DBG_LANGSET)
116 	{
117 	    printf ("%s charset", fcLangCharSets[i].lang);
118 	    FcCharSetPrint (&fcLangCharSets[i].charset);
119 	    printf ("\n");
120 	}
121 
122 	/*
123 	 * Check for Han charsets to make fonts
124 	 * which advertise support for a single language
125 	 * not support other Han languages
126 	 */
127 	if (exclusiveCharset &&
128 	    FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
129 	{
130 	    if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
131 		continue;
132 
133 	    for (j = 0; j < fcLangCharSets[i].charset.num; j++)
134 		if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
135 		    FcCharSetLeaf(exclusiveCharset, j))
136 		    continue;
137 	}
138 	missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
139         if (FcDebug() & FC_DBG_SCANV)
140 	{
141 	    if (missing && missing < 10)
142 	    {
143 		FcCharSet   *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
144 							 charset);
145 		FcChar32    ucs4;
146 		FcChar32    map[FC_CHARSET_MAP_SIZE];
147 		FcChar32    next;
148 
149 		printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
150 		printf ("{");
151 		for (ucs4 = FcCharSetFirstPage (missed, map, &next);
152 		     ucs4 != FC_CHARSET_DONE;
153 		     ucs4 = FcCharSetNextPage (missed, map, &next))
154 		{
155 		    int	    i, j;
156 		    for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
157 			if (map[i])
158 			{
159 			    for (j = 0; j < 32; j++)
160 				if (map[i] & (1U << j))
161 				    printf (" %04x", ucs4 + i * 32 + j);
162 			}
163 		}
164 		printf (" }\n\t");
165 		FcCharSetDestroy (missed);
166 	    }
167 	    else
168 		printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
169 	}
170 	if (!missing)
171 	    FcLangSetBitSet (ls, i);
172     }
173 
174     if (FcDebug() & FC_DBG_SCANV)
175 	printf ("\n");
176 
177 
178     return ls;
179 }
180 
181 FcChar8 *
FcLangNormalize(const FcChar8 * lang)182 FcLangNormalize (const FcChar8 *lang)
183 {
184     FcChar8 *result = NULL, *s, *orig;
185     char *territory, *encoding, *modifier;
186     size_t llen, tlen = 0, mlen = 0;
187 
188     if (!lang || !*lang)
189 	return NULL;
190 
191     /* might be called without initialization */
192     FcInitDebug ();
193 
194     if (FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C") == 0 ||
195 	FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C.UTF-8") == 0 ||
196 	FcStrCmpIgnoreCase (lang, (const FcChar8 *)"C.utf8") == 0 ||
197 	FcStrCmpIgnoreCase (lang, (const FcChar8 *)"POSIX") == 0)
198     {
199 	result = FcStrCopy ((const FcChar8 *)"en");
200 	goto bail;
201     }
202 
203     s = FcStrCopy (lang);
204     if (!s)
205 	goto bail;
206 
207     /* from the comments in glibc:
208      *
209      * LOCALE can consist of up to four recognized parts for the XPG syntax:
210      *
211      *            language[_territory[.codeset]][@modifier]
212      *
213      * Beside the first all of them are allowed to be missing.  If the
214      * full specified locale is not found, the less specific one are
215      * looked for.  The various part will be stripped off according to
216      * the following order:
217      *            (1) codeset
218      *            (2) normalized codeset
219      *            (3) territory
220      *            (4) modifier
221      *
222      * So since we don't take care of the codeset part here, what patterns
223      * we need to deal with is:
224      *
225      *   1. language_territory@modifier
226      *   2. language@modifier
227      *   3. language
228      *
229      * then. and maybe no need to try language_territory here.
230      */
231     modifier = strchr ((const char *) s, '@');
232     if (modifier)
233     {
234 	*modifier = 0;
235 	modifier++;
236 	mlen = strlen (modifier);
237     }
238     encoding = strchr ((const char *) s, '.');
239     if (encoding)
240     {
241 	*encoding = 0;
242 	encoding++;
243 	if (modifier)
244 	{
245 	    memmove (encoding, modifier, mlen + 1);
246 	    modifier = encoding;
247 	}
248     }
249     territory = strchr ((const char *) s, '_');
250     if (!territory)
251 	territory = strchr ((const char *) s, '-');
252     if (territory)
253     {
254 	*territory = 0;
255 	territory++;
256 	tlen = strlen (territory);
257     }
258     llen = strlen ((const char *) s);
259     if (llen < 2 || llen > 3)
260     {
261 	fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid language tag\n",
262 		 lang);
263 	goto bail0;
264     }
265     if (territory && (tlen < 2 || tlen > 3) &&
266 	!(territory[0] == 'z' && tlen < 5))
267     {
268 	fprintf (stderr, "Fontconfig warning: ignoring %s: not a valid region tag\n",
269 		 lang);
270 	goto bail0;
271     }
272     if (territory)
273 	territory[-1] = '-';
274     if (modifier)
275 	modifier[-1] = '@';
276     orig = FcStrDowncase (s);
277     if (!orig)
278 	goto bail0;
279     if (territory)
280     {
281 	if (FcDebug () & FC_DBG_LANGSET)
282 	    printf("Checking the existence of %s.orth\n", s);
283 	if (FcLangSetIndex (s) < 0)
284 	{
285 	    memmove (territory - 1, territory + tlen, (mlen > 0 ? mlen + 1 : 0) + 1);
286 	    if (modifier)
287 		modifier = territory;
288 	}
289 	else
290 	{
291 	    result = s;
292 	    /* we'll miss the opportunity to reduce the correct size
293 	     * of the allocated memory for the string after that.
294 	     */
295 	    s = NULL;
296 	    goto bail1;
297 	}
298     }
299     if (modifier)
300     {
301 	if (FcDebug () & FC_DBG_LANGSET)
302 	    printf("Checking the existence of %s.orth\n", s);
303 	if (FcLangSetIndex (s) < 0)
304 	    modifier[-1] = 0;
305 	else
306 	{
307 	    result = s;
308 	    /* we'll miss the opportunity to reduce the correct size
309 	     * of the allocated memory for the string after that.
310 	     */
311 	    s = NULL;
312 	    goto bail1;
313 	}
314     }
315     if (FcDebug () & FC_DBG_LANGSET)
316 	printf("Checking the existence of %s.orth\n", s);
317     if (FcLangSetIndex (s) < 0)
318     {
319 	/* there seems no languages matched in orth.
320 	 * add the language as is for fallback.
321 	 */
322 	result = orig;
323 	orig = NULL;
324     }
325     else
326     {
327 	result = s;
328 	/* we'll miss the opportunity to reduce the correct size
329 	 * of the allocated memory for the string after that.
330 	 */
331 	s = NULL;
332     }
333   bail1:
334     if (orig)
335 	FcStrFree (orig);
336   bail0:
337     if (s)
338 	free (s);
339   bail:
340     if (FcDebug () & FC_DBG_LANGSET)
341     {
342 	if (result)
343 	    printf ("normalized: %s -> %s\n", lang, result);
344 	else
345 	    printf ("Unable to normalize %s\n", lang);
346     }
347 
348     return result;
349 }
350 
351 #define FcLangEnd(c)	((c) == '-' || (c) == '\0')
352 
353 FcLangResult
FcLangCompare(const FcChar8 * s1,const FcChar8 * s2)354 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
355 {
356     FcChar8	    c1, c2;
357     FcLangResult    result = FcLangDifferentLang;
358     const FcChar8  *s1_orig = s1;
359     FcBool	    is_und;
360 
361     is_und = FcToLower (s1[0]) == 'u' &&
362 	     FcToLower (s1[1]) == 'n' &&
363 	     FcToLower (s1[2]) == 'd' &&
364 	     FcLangEnd (s1[3]);
365 
366     for (;;)
367     {
368 	c1 = *s1++;
369 	c2 = *s2++;
370 
371 	c1 = FcToLower (c1);
372 	c2 = FcToLower (c2);
373 	if (c1 != c2)
374 	{
375 	    if (!is_und && FcLangEnd (c1) && FcLangEnd (c2))
376 		result = FcLangDifferentTerritory;
377 	    return result;
378 	}
379 	else if (!c1)
380 	{
381 	    return is_und ? result : FcLangEqual;
382 	}
383 	else if (c1 == '-')
384 	{
385 	    if (!is_und)
386 		result = FcLangDifferentTerritory;
387 	}
388 
389 	/* If we parsed past "und-", then do not consider it undefined anymore,
390 	 * as there's *something* specified. */
391 	if (is_und && s1 - s1_orig == 4)
392 	    is_und = FcFalse;
393     }
394 }
395 
396 /*
397  * Return FcTrue when super contains sub.
398  *
399  * super contains sub if super and sub have the same
400  * language and either the same country or one
401  * is missing the country
402  */
403 
404 static FcBool
FcLangContains(const FcChar8 * super,const FcChar8 * sub)405 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
406 {
407     FcChar8	    c1, c2;
408 
409     for (;;)
410     {
411 	c1 = *super++;
412 	c2 = *sub++;
413 
414 	c1 = FcToLower (c1);
415 	c2 = FcToLower (c2);
416 	if (c1 != c2)
417 	{
418 	    /* see if super has a country while sub is missing one */
419 	    if (c1 == '-' && c2 == '\0')
420 		return FcTrue;
421 	    /* see if sub has a country while super is missing one */
422 	    if (c1 == '\0' && c2 == '-')
423 		return FcTrue;
424 	    return FcFalse;
425 	}
426 	else if (!c1)
427 	    return FcTrue;
428     }
429 }
430 
431 const FcCharSet *
FcLangGetCharSet(const FcChar8 * lang)432 FcLangGetCharSet (const FcChar8 *lang)
433 {
434     int		i;
435     int		country = -1;
436 
437     for (i = 0; i < NUM_LANG_CHAR_SET; i++)
438     {
439 	switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
440 	case FcLangEqual:
441 	    return &fcLangCharSets[i].charset;
442 	case FcLangDifferentTerritory:
443 	    if (country == -1)
444 		country = i;
445 	case FcLangDifferentLang:
446 	default:
447 	    break;
448 	}
449     }
450     if (country == -1)
451 	return 0;
452     return &fcLangCharSets[country].charset;
453 }
454 
455 FcStrSet *
FcGetLangs(void)456 FcGetLangs (void)
457 {
458     FcStrSet *langs;
459     int	i;
460 
461     langs = FcStrSetCreate();
462     if (!langs)
463 	return 0;
464 
465     for (i = 0; i < NUM_LANG_CHAR_SET; i++)
466 	FcStrSetAdd (langs, fcLangCharSets[i].lang);
467 
468     return langs;
469 }
470 
471 FcLangSet *
FcLangSetCreate(void)472 FcLangSetCreate (void)
473 {
474     FcLangSet	*ls;
475 
476     ls = malloc (sizeof (FcLangSet));
477     if (!ls)
478 	return 0;
479     memset (ls->map, '\0', sizeof (ls->map));
480     ls->map_size = NUM_LANG_SET_MAP;
481     ls->extra = 0;
482     return ls;
483 }
484 
485 void
FcLangSetDestroy(FcLangSet * ls)486 FcLangSetDestroy (FcLangSet *ls)
487 {
488     if (!ls)
489 	return;
490 
491     if (ls->extra)
492 	FcStrSetDestroy (ls->extra);
493     free (ls);
494 }
495 
496 FcLangSet *
FcLangSetCopy(const FcLangSet * ls)497 FcLangSetCopy (const FcLangSet *ls)
498 {
499     FcLangSet	*new;
500 
501     if (!ls)
502 	return NULL;
503 
504     new = FcLangSetCreate ();
505     if (!new)
506 	goto bail0;
507     memset (new->map, '\0', sizeof (new->map));
508     memcpy (new->map, ls->map, FC_MIN (sizeof (new->map), ls->map_size * sizeof (ls->map[0])));
509     if (ls->extra)
510     {
511 	FcStrList	*list;
512 	FcChar8		*extra;
513 
514 	new->extra = FcStrSetCreate ();
515 	if (!new->extra)
516 	    goto bail1;
517 
518 	list = FcStrListCreate (ls->extra);
519 	if (!list)
520 	    goto bail1;
521 
522 	while ((extra = FcStrListNext (list)))
523 	    if (!FcStrSetAdd (new->extra, extra))
524 	    {
525 		FcStrListDone (list);
526 		goto bail1;
527 	    }
528 	FcStrListDone (list);
529     }
530     return new;
531 bail1:
532     FcLangSetDestroy (new);
533 bail0:
534     return 0;
535 }
536 
537 /* When the language isn't found, the return value r is such that:
538  *  1) r < 0
539  *  2) -r -1 is the index of the first language in fcLangCharSets that comes
540  *     after the 'lang' argument in lexicographic order.
541  *
542  *  The -1 is necessary to avoid problems with language id 0 (otherwise, we
543  *  wouldn't be able to distinguish between “language found, id is 0” and
544  *  “language not found, sorts right before the language with id 0”).
545  */
546 static int
FcLangSetIndex(const FcChar8 * lang)547 FcLangSetIndex (const FcChar8 *lang)
548 {
549     int	    low, high, mid = 0;
550     int	    cmp = 0;
551     FcChar8 firstChar = FcToLower(lang[0]);
552     FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
553 
554     if (firstChar < 'a')
555     {
556 	low = 0;
557 	high = fcLangCharSetRanges[0].begin;
558     }
559     else if(firstChar > 'z')
560     {
561 	low = fcLangCharSetRanges[25].begin;
562 	high = NUM_LANG_CHAR_SET - 1;
563     }
564     else
565     {
566 	low = fcLangCharSetRanges[firstChar - 'a'].begin;
567 	high = fcLangCharSetRanges[firstChar - 'a'].end;
568 	/* no matches */
569 	if (low > high)
570 	    return -(low+1); /* one past next entry after where it would be */
571     }
572 
573     while (low <= high)
574     {
575 	mid = (high + low) >> 1;
576 	if(fcLangCharSets[mid].lang[0] != firstChar)
577 	    cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
578 	else
579 	{   /* fast path for resolving 2-letter languages (by far the most common) after
580 	     * finding the first char (probably already true because of the hash table) */
581 	    cmp = fcLangCharSets[mid].lang[1] - secondChar;
582 	    if (cmp == 0 &&
583 		(fcLangCharSets[mid].lang[2] != '\0' ||
584 		 lang[2] != '\0'))
585 	    {
586 		cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
587 					 lang+2);
588 	    }
589 	}
590 	if (cmp == 0)
591 	    return mid;
592 	if (cmp < 0)
593 	    low = mid + 1;
594 	else
595 	    high = mid - 1;
596     }
597     if (cmp < 0)
598 	mid++;
599     return -(mid + 1);
600 }
601 
602 FcBool
FcLangSetAdd(FcLangSet * ls,const FcChar8 * lang)603 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
604 {
605     int	    id;
606 
607     id = FcLangSetIndex (lang);
608     if (id >= 0)
609     {
610 	FcLangSetBitSet (ls, id);
611 	return FcTrue;
612     }
613     if (!ls->extra)
614     {
615 	ls->extra = FcStrSetCreate ();
616 	if (!ls->extra)
617 	    return FcFalse;
618     }
619     return FcStrSetAdd (ls->extra, lang);
620 }
621 
622 FcBool
FcLangSetDel(FcLangSet * ls,const FcChar8 * lang)623 FcLangSetDel (FcLangSet *ls, const FcChar8 *lang)
624 {
625     int	id;
626 
627     id = FcLangSetIndex (lang);
628     if (id >= 0)
629     {
630 	FcLangSetBitReset (ls, id);
631     }
632     else if (ls->extra)
633     {
634 	FcStrSetDel (ls->extra, lang);
635     }
636     return FcTrue;
637 }
638 
639 FcLangResult
FcLangSetHasLang(const FcLangSet * ls,const FcChar8 * lang)640 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
641 {
642     int		    id;
643     FcLangResult    best, r;
644     int		    i;
645 
646     id = FcLangSetIndex (lang);
647     if (id < 0)
648 	id = -id - 1;
649     else if (FcLangSetBitGet (ls, id))
650 	return FcLangEqual;
651     best = FcLangDifferentLang;
652     for (i = id - 1; i >= 0; i--)
653     {
654 	r = FcLangCompare (lang, fcLangCharSets[i].lang);
655 	if (r == FcLangDifferentLang)
656 	    break;
657 	if (FcLangSetBitGet (ls, i) && r < best)
658 	    best = r;
659     }
660     for (i = id; i < NUM_LANG_CHAR_SET; i++)
661     {
662 	r = FcLangCompare (lang, fcLangCharSets[i].lang);
663 	if (r == FcLangDifferentLang)
664 	    break;
665 	if (FcLangSetBitGet (ls, i) && r < best)
666 	    best = r;
667     }
668     if (ls->extra)
669     {
670 	FcStrList	*list = FcStrListCreate (ls->extra);
671 	FcChar8		*extra;
672 
673 	if (list)
674 	{
675 	    while (best > FcLangEqual && (extra = FcStrListNext (list)))
676 	    {
677 		r = FcLangCompare (lang, extra);
678 		if (r < best)
679 		    best = r;
680 	    }
681 	    FcStrListDone (list);
682 	}
683     }
684     return best;
685 }
686 
687 static FcLangResult
FcLangSetCompareStrSet(const FcLangSet * ls,FcStrSet * set)688 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
689 {
690     FcStrList	    *list = FcStrListCreate (set);
691     FcLangResult    r, best = FcLangDifferentLang;
692     FcChar8	    *extra;
693 
694     if (list)
695     {
696 	while (best > FcLangEqual && (extra = FcStrListNext (list)))
697 	{
698 	    r = FcLangSetHasLang (ls, extra);
699 	    if (r < best)
700 		best = r;
701 	}
702 	FcStrListDone (list);
703     }
704     return best;
705 }
706 
707 FcLangResult
FcLangSetCompare(const FcLangSet * lsa,const FcLangSet * lsb)708 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
709 {
710     int		    i, j, count;
711     FcLangResult    best, r;
712     FcChar32 aInCountrySet, bInCountrySet;
713 
714     count = FC_MIN (lsa->map_size, lsb->map_size);
715     count = FC_MIN (NUM_LANG_SET_MAP, count);
716     for (i = 0; i < count; i++)
717 	if (lsa->map[i] & lsb->map[i])
718 	    return FcLangEqual;
719     best = FcLangDifferentLang;
720     for (j = 0; j < NUM_COUNTRY_SET; j++)
721     {
722 	aInCountrySet = 0;
723 	bInCountrySet = 0;
724 
725 	for (i = 0; i < count; i++)
726 	{
727 	    aInCountrySet |= lsa->map[i] & fcLangCountrySets[j][i];
728 	    bInCountrySet |= lsb->map[i] & fcLangCountrySets[j][i];
729 
730 	    if (aInCountrySet && bInCountrySet)
731 	    {
732 		best = FcLangDifferentTerritory;
733 		break;
734 	    }
735 	}
736     }
737     if (lsa->extra)
738     {
739 	r = FcLangSetCompareStrSet (lsb, lsa->extra);
740 	if (r < best)
741 	    best = r;
742     }
743     if (best > FcLangEqual && lsb->extra)
744     {
745 	r = FcLangSetCompareStrSet (lsa, lsb->extra);
746 	if (r < best)
747 	    best = r;
748     }
749     return best;
750 }
751 
752 /*
753  * Used in computing values -- mustn't allocate any storage
754  */
755 FcLangSet *
FcLangSetPromote(const FcChar8 * lang,FcValuePromotionBuffer * vbuf)756 FcLangSetPromote (const FcChar8 *lang, FcValuePromotionBuffer *vbuf)
757 {
758     int		id;
759     typedef struct {
760 	FcLangSet  ls;
761 	FcStrSet   strs;
762 	FcChar8   *str;
763     } FcLangSetPromotionBuffer;
764     FcLangSetPromotionBuffer *buf = (FcLangSetPromotionBuffer *) vbuf;
765 
766     FC_ASSERT_STATIC (sizeof (FcLangSetPromotionBuffer) <= sizeof (FcValuePromotionBuffer));
767 
768     memset (buf->ls.map, '\0', sizeof (buf->ls.map));
769     buf->ls.map_size = NUM_LANG_SET_MAP;
770     buf->ls.extra = 0;
771     if (lang)
772     {
773 	id = FcLangSetIndex (lang);
774 	if (id >= 0)
775 	{
776 	    FcLangSetBitSet (&buf->ls, id);
777 	}
778 	else
779 	{
780 	    buf->ls.extra = &buf->strs;
781 	    buf->strs.num = 1;
782 	    buf->strs.size = 1;
783 	    buf->strs.strs = &buf->str;
784 	    FcRefInit (&buf->strs.ref, 1);
785 	    buf->str = (FcChar8 *) lang;
786 	}
787     }
788     return &buf->ls;
789 }
790 
791 FcChar32
FcLangSetHash(const FcLangSet * ls)792 FcLangSetHash (const FcLangSet *ls)
793 {
794     FcChar32	h = 0;
795     int		i, count;
796 
797     count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
798     for (i = 0; i < count; i++)
799 	h ^= ls->map[i];
800     if (ls->extra)
801 	h ^= ls->extra->num;
802     return h;
803 }
804 
805 FcLangSet *
FcNameParseLangSet(const FcChar8 * string)806 FcNameParseLangSet (const FcChar8 *string)
807 {
808     FcChar8	    lang[32], c = 0;
809     int i;
810     FcLangSet	    *ls;
811 
812     ls = FcLangSetCreate ();
813     if (!ls)
814 	goto bail0;
815 
816     for(;;)
817     {
818 	for(i = 0; i < 31;i++)
819 	{
820 	    c = *string++;
821 	    if(c == '\0' || c == '|')
822 		break; /* end of this code */
823 	    lang[i] = c;
824 	}
825 	lang[i] = '\0';
826 	if (!FcLangSetAdd (ls, lang))
827 	    goto bail1;
828 	if(c == '\0')
829 	    break;
830     }
831     return ls;
832 bail1:
833     FcLangSetDestroy (ls);
834 bail0:
835     return 0;
836 }
837 
838 FcBool
FcNameUnparseLangSet(FcStrBuf * buf,const FcLangSet * ls)839 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
840 {
841     int		i, bit, count;
842     FcChar32	bits;
843     FcBool	first = FcTrue;
844 
845     count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
846     for (i = 0; i < count; i++)
847     {
848 	if ((bits = ls->map[i]))
849 	{
850 	    for (bit = 0; bit <= 31; bit++)
851 		if (bits & (1U << bit))
852 		{
853 		    int id = (i << 5) | bit;
854 		    if (!first)
855 			if (!FcStrBufChar (buf, '|'))
856 			    return FcFalse;
857 		    if (!FcStrBufString (buf, fcLangCharSets[fcLangCharSetIndicesInv[id]].lang))
858 			return FcFalse;
859 		    first = FcFalse;
860 		}
861 	}
862     }
863     if (ls->extra)
864     {
865 	FcStrList   *list = FcStrListCreate (ls->extra);
866 	FcChar8	    *extra;
867 
868 	if (!list)
869 	    return FcFalse;
870 	while ((extra = FcStrListNext (list)))
871 	{
872 	    if (!first)
873 		if (!FcStrBufChar (buf, '|'))
874                 {
875                     FcStrListDone (list);
876 		    return FcFalse;
877                 }
878 	    if (!FcStrBufString (buf, extra))
879                 {
880                     FcStrListDone (list);
881                     return FcFalse;
882                 }
883 	    first = FcFalse;
884 	}
885         FcStrListDone (list);
886     }
887     return FcTrue;
888 }
889 
890 FcBool
FcLangSetEqual(const FcLangSet * lsa,const FcLangSet * lsb)891 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
892 {
893     int	    i, count;
894 
895     count = FC_MIN (lsa->map_size, lsb->map_size);
896     count = FC_MIN (NUM_LANG_SET_MAP, count);
897     for (i = 0; i < count; i++)
898     {
899 	if (lsa->map[i] != lsb->map[i])
900 	    return FcFalse;
901     }
902     if (!lsa->extra && !lsb->extra)
903 	return FcTrue;
904     if (lsa->extra && lsb->extra)
905 	return FcStrSetEqual (lsa->extra, lsb->extra);
906     return FcFalse;
907 }
908 
909 static FcBool
FcLangSetContainsLang(const FcLangSet * ls,const FcChar8 * lang)910 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
911 {
912     int		    id;
913     int		    i;
914 
915     id = FcLangSetIndex (lang);
916     if (id < 0)
917 	id = -id - 1;
918     else if (FcLangSetBitGet (ls, id))
919 	return FcTrue;
920     /*
921      * search up and down among equal languages for a match
922      */
923     for (i = id - 1; i >= 0; i--)
924     {
925 	if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
926 	    break;
927 	if (FcLangSetBitGet (ls, i) &&
928 	    FcLangContains (fcLangCharSets[i].lang, lang))
929 	    return FcTrue;
930     }
931     for (i = id; i < NUM_LANG_CHAR_SET; i++)
932     {
933 	if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
934 	    break;
935 	if (FcLangSetBitGet (ls, i) &&
936 	    FcLangContains (fcLangCharSets[i].lang, lang))
937 	    return FcTrue;
938     }
939     if (ls->extra)
940     {
941 	FcStrList	*list = FcStrListCreate (ls->extra);
942 	FcChar8		*extra;
943 
944 	if (list)
945 	{
946 	    while ((extra = FcStrListNext (list)))
947 	    {
948 		if (FcLangContains (extra, lang))
949 		    break;
950 	    }
951 	    FcStrListDone (list);
952     	    if (extra)
953 		return FcTrue;
954 	}
955     }
956     return FcFalse;
957 }
958 
959 /*
960  * return FcTrue if lsa contains every language in lsb
961  */
962 FcBool
FcLangSetContains(const FcLangSet * lsa,const FcLangSet * lsb)963 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
964 {
965     int		    i, j, count;
966     FcChar32	    missing;
967 
968     if (FcDebug() & FC_DBG_MATCHV)
969     {
970 	printf ("FcLangSet "); FcLangSetPrint (lsa);
971 	printf (" contains "); FcLangSetPrint (lsb);
972 	printf ("\n");
973     }
974     /*
975      * check bitmaps for missing language support
976      */
977     count = FC_MIN (lsa->map_size, lsb->map_size);
978     count = FC_MIN (NUM_LANG_SET_MAP, count);
979     for (i = 0; i < count; i++)
980     {
981 	missing = lsb->map[i] & ~lsa->map[i];
982 	if (missing)
983 	{
984 	    for (j = 0; j < 32; j++)
985 		if (missing & (1U << j))
986 		{
987 		    if (!FcLangSetContainsLang (lsa,
988 						fcLangCharSets[fcLangCharSetIndicesInv[i*32 + j]].lang))
989 		    {
990 			if (FcDebug() & FC_DBG_MATCHV)
991 			    printf ("\tMissing bitmap %s\n", fcLangCharSets[fcLangCharSetIndicesInv[i*32+j]].lang);
992 			return FcFalse;
993 		    }
994 		}
995 	}
996     }
997     if (lsb->extra)
998     {
999 	FcStrList   *list = FcStrListCreate (lsb->extra);
1000 	FcChar8	    *extra;
1001 
1002 	if (list)
1003 	{
1004 	    while ((extra = FcStrListNext (list)))
1005 	    {
1006 		if (!FcLangSetContainsLang (lsa, extra))
1007 		{
1008 		    if (FcDebug() & FC_DBG_MATCHV)
1009 			printf ("\tMissing string %s\n", extra);
1010 		    break;
1011 		}
1012 	    }
1013 	    FcStrListDone (list);
1014 	    if (extra)
1015 		return FcFalse;
1016 	}
1017     }
1018     return FcTrue;
1019 }
1020 
1021 FcBool
FcLangSetSerializeAlloc(FcSerialize * serialize,const FcLangSet * l)1022 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
1023 {
1024     if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
1025 	return FcFalse;
1026     return FcTrue;
1027 }
1028 
1029 FcLangSet *
FcLangSetSerialize(FcSerialize * serialize,const FcLangSet * l)1030 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
1031 {
1032     FcLangSet	*l_serialize = FcSerializePtr (serialize, l);
1033 
1034     if (!l_serialize)
1035 	return NULL;
1036     memset (l_serialize->map, '\0', sizeof (l_serialize->map));
1037     memcpy (l_serialize->map, l->map, FC_MIN (sizeof (l_serialize->map), l->map_size * sizeof (l->map[0])));
1038     l_serialize->map_size = NUM_LANG_SET_MAP;
1039     l_serialize->extra = NULL; /* We don't serialize ls->extra */
1040     return l_serialize;
1041 }
1042 
1043 FcStrSet *
FcLangSetGetLangs(const FcLangSet * ls)1044 FcLangSetGetLangs (const FcLangSet *ls)
1045 {
1046     FcStrSet *langs;
1047     int	      i;
1048 
1049     langs = FcStrSetCreate();
1050     if (!langs)
1051 	return 0;
1052 
1053     for (i = 0; i < NUM_LANG_CHAR_SET; i++)
1054 	if (FcLangSetBitGet (ls, i))
1055 	    FcStrSetAdd (langs, fcLangCharSets[i].lang);
1056 
1057     if (ls->extra)
1058     {
1059 	FcStrList	*list = FcStrListCreate (ls->extra);
1060 	FcChar8		*extra;
1061 
1062 	if (list)
1063 	{
1064 	    while ((extra = FcStrListNext (list)))
1065 		FcStrSetAdd (langs, extra);
1066 
1067 	    FcStrListDone (list);
1068 	}
1069     }
1070 
1071     return langs;
1072 }
1073 
1074 static FcLangSet *
FcLangSetOperate(const FcLangSet * a,const FcLangSet * b,FcBool (* func)(FcLangSet * ls,const FcChar8 * s))1075 FcLangSetOperate(const FcLangSet	*a,
1076 		 const FcLangSet	*b,
1077 		 FcBool			(*func) (FcLangSet 	*ls,
1078 						 const FcChar8	*s))
1079 {
1080     FcLangSet	*langset = FcLangSetCopy (a);
1081     FcStrSet	*set = FcLangSetGetLangs (b);
1082     FcStrList	*sl = FcStrListCreate (set);
1083     FcChar8	*str;
1084 
1085     FcStrSetDestroy (set);
1086     while ((str = FcStrListNext (sl)))
1087     {
1088 	func (langset, str);
1089     }
1090     FcStrListDone (sl);
1091 
1092     return langset;
1093 }
1094 
1095 FcLangSet *
FcLangSetUnion(const FcLangSet * a,const FcLangSet * b)1096 FcLangSetUnion (const FcLangSet *a, const FcLangSet *b)
1097 {
1098     return FcLangSetOperate(a, b, FcLangSetAdd);
1099 }
1100 
1101 FcLangSet *
FcLangSetSubtract(const FcLangSet * a,const FcLangSet * b)1102 FcLangSetSubtract (const FcLangSet *a, const FcLangSet *b)
1103 {
1104     return FcLangSetOperate(a, b, FcLangSetDel);
1105 }
1106 
1107 #define __fclang__
1108 #include "fcaliastail.h"
1109 #include "fcftaliastail.h"
1110 #undef __fclang__
1111