xref: /freebsd/lib/libc/string/strcoll.c (revision bdd1243d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
5  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
6  *		at Electronni Visti IA, Kiev, Ukraine.
7  *			All rights reserved.
8  *
9  * Copyright (c) 2011 The FreeBSD Foundation
10  *
11  * Portions of this software were developed by David Chisnall
12  * under sponsorship from the FreeBSD Foundation.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include <stdlib.h>
40 #include <string.h>
41 #include <errno.h>
42 #include <wchar.h>
43 #include "collate.h"
44 
45 
46 /*
47  * In order to properly handle multibyte locales, its easiest to just
48  * convert to wide characters and then use wcscoll.  However if an
49  * error occurs, we gracefully fall back to simple strcmp.  Caller
50  * should check errno.
51  */
52 int
53 strcoll_l(const char *s, const char *s2, locale_t locale)
54 {
55 	int ret;
56 	wchar_t *t1 = NULL, *t2 = NULL;
57 	wchar_t *w1 = NULL, *w2 = NULL;
58 	const char *cs1, *cs2;
59 	mbstate_t mbs1;
60 	mbstate_t mbs2;
61 	size_t sz1, sz2;
62 
63 	memset(&mbs1, 0, sizeof (mbstate_t));
64 	memset(&mbs2, 0, sizeof (mbstate_t));
65 
66 	/*
67 	 * The mbsrtowcs_l function can set the src pointer to null upon
68 	 * failure, so it should act on a copy to avoid:
69 	 *   - sending null pointer to strcmp
70 	 *   - having strcoll/strcoll_l change *s or *s2 to null
71 	 */
72 	cs1 = s;
73 	cs2 = s2;
74 
75 	FIX_LOCALE(locale);
76 	struct xlocale_collate *table =
77 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
78 
79 	if (table->__collate_load_error)
80 		goto error;
81 
82 	sz1 = strlen(s) + 1;
83 	sz2 = strlen(s2) + 1;
84 
85 	/*
86 	 * Simple assumption: conversion to wide format is strictly
87 	 * reducing, i.e. a single byte (or multibyte character)
88 	 * cannot result in multiple wide characters.
89 	 */
90 	if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
91 		goto error;
92 	w1 = t1;
93 	if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
94 		goto error;
95 	w2 = t2;
96 
97 	if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1)
98 		goto error;
99 
100 	if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1)
101 		goto error;
102 
103 	ret = wcscoll_l(w1, w2, locale);
104 	free(t1);
105 	free(t2);
106 
107 	return (ret);
108 
109 error:
110 	free(t1);
111 	free(t2);
112 	return (strcmp(s, s2));
113 }
114 
115 int
116 strcoll(const char *s, const char *s2)
117 {
118 	return strcoll_l(s, s2, __get_locale());
119 }
120 
121