1\function{SLutf8_skip_char}
2\synopsis{Skip past a UTF-8 encoded character}
3\usage{SLuchar_Type *SLutf8_skip_char (SLuchar_Type *u, SLuchar_Type *umax)}
4\description
5 The \cfun{SLutf8_skip_char} function returns a pointer to the
6 character immediately following the UTF-8 encoded character at
7 \exmp{u}.  It will make no attempt to examine the bytes at the
8 position \exmp{umax} and beyond.  If the bytes at \exmp{u} do not
9 represent a valid or legal UTF-8 encoded sequence, a pointer to the
10 byte following \exmp{u} will be returned.
11\notes
12 Unicode combining characters are treated as distinct characters by
13 this function.
14\seealso{SLutf8_skip_chars, SLutf8_bskip_char, SLutf8_strlen}
15\done
16
17\function{SLutf8_skip_chars}
18\synopsis{Skip past a specified number of characters in a UTF-8
19 encoded string}
20\usage{SLuchar_Type *SLutf8_skip_chars (u, umax, num, dnum, ignore_combining)}
21#v+
22    SLuchar_Type *u, *umax;
23    unsigned int num;
24    unsigned int *dnum;
25    int ignore_combining;
26#v-
27\description
28 This functions attempts to skip forward past \exmp{num} UTF-8 encoded
29 characters at \exmp{u} returning the actual number skipped via the
30 parameter \exmp{dnum}.  It will make no attempt to examine bytes at
31 \exmp{umax} and beyond.  Unicode combining characters will not be
32 counted if \exmp{ignore_combining} is non-zero, otherwise they will
33 be treated as distinct characters.  If the input contains an
34 invalid or illegal UTF-8 sequence, then each byte in the sequence
35 will be treated as a single character.
36\seealso{SLutf8_skip_char, SLutf8_bskip_chars}
37\done
38
39\function{SLutf8_bskip_char}
40\synopsis{Skip backward past a UTF-8 encoded character}
41\usage{SLuchar_Type *SLutf8_bskip_char (SLuchar_Type *umin, SLuchar_Type *u)}
42\description
43  The \cfun{SLutf8_bskip_char} skips backward to the start of the
44  UTF-8 encoded character immediately before the position \exmp{u}.
45  The function will make no attempt to examine characters before the
46  position \exmp{umin}.  UTF-8 combining characters are treated as
47  distinct characters.
48\seealso{SLutf8_bskip_chars, SLutf8_skip_char}
49\done
50
51\function{SLutf8_bskip_chars}
52\synopsis{Skip backward past a specified number of UTF-8 encoded characters}
53\usage{SLuchar_Type *SLutf8_bskip_chars (umin, u, num, dnum, ignore_combining)}
54#v+
55   SLuchar_Type *umin, *u;
56   unsigned int num;
57   unsigned int *dnum;
58   int ignore_combining;
59#v-
60\description
61 This functions attempts to skip backward past \exmp{num} UTF-8
62 encoded characters occurring immediately before \exmp{u}.  It returns
63 the the actual number skipped via the parameter \exmp{dnum}.  No
64 attempt will be made to examine the bytes occurring before \exmp{umin}.
65 Unicode combining characters will not be counted if \exmp{ignore_combining}
66 is non-zero, otherwise they will be treated as distinct characters.
67 If the input contains an invalid or illegal UTF-8 sequence, then each
68 byte in the sequence will be treated as a single character.
69\seealso{SLutf8_skip_char, SLutf8_bskip_chars}
70\done
71
72\function{SLutf8_decode}
73\synopsis{Decode a UTF-8 encoded character sequence}
74\usage{SLuchar_Type *SLutf8_decode (u, umax, w, nconsumedp}
75#v+
76   SLuchar_Type *u, *umax;
77   SLwchar_Type *w;
78   unsigned int *nconsumedp;
79#v-
80\description
81 The \cfun{SLutf8_decode} function decodes the UTF-8 encoded character
82 occurring at \exmp{u} and returns the decoded character via the
83 parameter \exmp{w}.  No attempt will be made to examine the bytes at
84 \exmp{umax} and beyond.  If the parameter \exmp{nconsumedp} is
85 non-NULL, then the number of bytes consumed by the function will
86 be returned to it.  If the sequence at \exmp{u} is invalid or
87 illegal, the function will return \NULL and with the number of
88 bytes consumed by the function equal to the size of the invalid
89 sequence.  Otherwise the function will return a pointer to byte
90 following encoded sequence.
91\seealso{SLutf8_decode, SLutf8_strlen, SLutf8_skip_char}
92\done
93
94\function{SLutf8_encode}
95\synopsis{UTF-8 encode a character}
96\usage{SLuchar_Type *SLutf8_encode (w, u, ulen)}
97#v+
98   SLwchar_Type w;
99   SLuchar_Type *u;
100   unsigned int ulen;
101#v-
102\description
103 This function UTF-8 encodes the Unicode character represented by
104 \exmp{w} and stored the encoded representation in the buffer of size
105 \exmp{ulen} bytes at \exmp{u}.  The function will return \NULL if the
106 size of the buffer is too small to represent the UTF-8 encoded
107 character, otherwise it will return a pointer to the byte following
108 encoded representation.
109\notes
110 This function does not null terminate the resulting byte sequence.
111 The function \cfun{SLutf8_encode_null_terminate} may be used for that
112 purpose.
113
114 To guarantee that the buffer is large enough to hold the encoded
115 bytes, its size should be at least \exmp{SLUTF8_MAX_BLEN} bytes.
116
117 The function will encode illegal Unicode characters, i.e., characters
118 in the range 0xD800-0xFFFF (the UTF-16 surrogates) and 0xFFFE-0xFFFF.
119\seealso{SLutf8_decode, SLutf8_encode_bytes, SLutf8_encode_null_terminate}
120\done
121
122\function{SLutf8_strlen}
123\synopsis{Determine the number of characters in a UTF-8 sequence}
124\usage{unsigned int SLutf8_strlen (SLuchar_Type *s, int ignore_combining)}
125\description
126 This function may be used to determine the number of characters
127 represented by the null-terminated UTF-8 byte sequence.  If the
128 \exmp{ignore_combining} parameter is non-zero, then Unicode combining
129 characters will not be counted.
130\seealso{SLutf8_skip_chars, SLutf8_decode}
131\done
132
133\function{SLutf8_extract_utf8_char}
134\synopsis{Extract a UTF-8 encoded character}
135\usage{SLuchar_Type *SLutf8_extract_utf8_char (u, umax, buf)}
136#v+
137   SLuchar_Type *u, *umax, *buf;
138#v-
139\description
140 This function extracts the bytes representing UTF-8 encoded character
141 at \exmp{u} and places them in the buffer \exmp{buf}, and then null
142 terminates the result.  The buffer is assumed to consist of at least
143 \exmp{SLUTF8_MAX_BLEN+1} bytes, where the extra byte may be necessary
144 for null termination.  No attempt will be made to examine the
145 characters at \exmp{umax} and beyond.  If the byte-sequence at
146 \exmp{u} is an illegal or invalid UTF-8 sequence, then the byte at
147 \exmp{u} will be copied to the buffer.  The function returns a
148 pointer to the byte following copied bytes.
149\notes
150 One may think of this function as the single byte analogue of
151#v+
152     if (u < umax)
153       {
154          buf[0] = *u++;
155          buf[1] = 0;
156       }
157#v-
158\seealso{SLutf8_decode, SLutf8_skip_char}
159\done
160
161\function{SLutf8_encode_null_terminate}
162\synopsis{UTF-8 encode a character and null terminate the result}
163\usage{SLuchar_Type *SLutf8_encode_null_terminate (w, buf)}
164#v+
165   SLwchar_Type w;
166   SLuchar_Type *buf;
167#v-
168\description
169 This function has the same functionality as \cfun{SLutf8_encode},
170 except that it also null terminates the encoded sequences.  The
171 buffer \exmp{buf}, where the encoded sequence is placed, is assumed
172 to consist of at least \exmp{SLUTF8_MAX_BLEN+1} bytes.
173\seealso{SLutf8_encode}
174\done
175
176\function{SLutf8_strup}
177\synopsis{Uppercase a UTF-8 encoded string}
178\usage{SLuchar_Type *SLutf8_strup (SLuchar_Type *u, SLuchar_Type *umax)}
179\description
180 The \cfun{SLutf8_strup} function returns the uppercase equivalent of
181 UTF-8 encoded sequence of \exmp{umax-u} bytes at \exmp{u}.  The
182 result will be returned as a null-terminated \exmp{SLstring} and
183 should be freed with \cfun{SLang_free_slstring} when it is nolonger
184 needed.  If the function encounters an invalid of illegal byte
185 sequence, then the byte-sequence will be copied as as-is.
186\seealso{SLutf8_strlow, SLwchar_toupper}
187\done
188
189\function{SLutf8_strlo}
190\synopsis{Lowercase a UTF-8 encoded string}
191\usage{SLuchar_Type *SLutf8_strlo (SLuchar_Type *u, SLuchar_Type *umax)}
192\description
193 The \cfun{SLutf8_strlo} function returns the lowercase equivalent of
194 UTF-8 encoded sequence of \exmp{umax-u} bytes at \exmp{u}.  The
195 result will be returned as a null-terminated \exmp{SLstring} and
196 should be freed with \cfun{SLang_free_slstring} when it is nolonger
197 needed.  If the function encounters an invalid of illegal byte
198 sequence, then the byte-sequence will be copied as as-is.
199\seealso{SLutf8_strlow, SLwchar_toupper}
200\done
201
202\function{SLutf8_subst_wchar}
203\synopsis{Replace a character in a UTF-8 encoded string}
204\usage{SLstr_Type *SLutf8_subst_wchar (u, umax, wch, nth,ignore_combining)}
205#v+
206   SLuchar_Type *u, *umax;
207   SLwchar_Type wch;
208   unsigned int nth;
209   int ignore_combining;
210#v-
211\description
212 The \cfun{SLutf8_subst_wchar} function replaces the UTF-8 sequence
213 representing the \exmp{nth} character of \exmp{u} by the UTF-8
214 representation of the character \exmp{wch}.  If the value of the
215 \exmp{ignore_combining} parameter is non-zero, then combining
216 characters will not be counted when computing the position of the
217 \exmp{nth} character.  In addition, if the \exmp{nth} character
218 contains any combining characters, then the byte-sequence associated
219 with those characters will also be replaced.
220
221 Since the byte sequence representing \exmp{wch} could be longer than
222 the sequence of the \exmp{nth} character, the function returns a new
223 copy of the resulting string as an \exmp{SLSTRING}.  Hence, the
224 calling function should call \cfun{SLang_free_slstring} when the
225 result is nolonger needed.
226\seealso{SLutf8_strup, SLutf8_strlow, SLutf8_skip_chars, SLutf8_strlen}
227\done
228
229\function{SLutf8_compare}
230\synopsis{Compare two UTF-8 encoded sequences}
231\usage{int SLutf8_compare (a, amax, b, bmax, nchars, case_sensitive)}
232#v+
233   SLuchar_Type *a, *amax;
234   SLuchar_Type *b, *bmax;
235   unsigned int nchars;
236   int case_sensitive;
237#v-
238\description
239 This function compares \exmp{nchars} of one UTF-8 encoded character
240 sequence to another by performing a character by character comparison.
241 The function returns 0, +1, or -1 according to whether the string
242 \exmp{a} is is equal to, greater than, or less than the string at
243 \exmp{b}.  At most \exmp{nchars} characters will be tested.  The
244 parameters \exmp{amax} and \exmp{bmax} serve as upper boundaries of
245 the strings \exmp{a} and \exmp{b}, resp.
246
247 If the value of the \exmp{case_sensitive} parameter is non-zero, then
248 a case-sensitive comparison will be performed, otherwise characters
249 will be compared in a case-insensitive manner.
250\notes
251 For case-sensitive comparisons, this function is analogous to the
252 standard C library's \cfun{strncmp} function.  However,
253 \ifun{SLutf8_compare} can also cope with invalid or illegal UTF-8
254 sequences.
255\seealso{SLutf8_strup, SLutf8_strlen, SLutf8_strlen}
256\done
257
258#% \function{SLutf8_decode_bytes}
259#% \synopsis{}
260#% \usage{int SLutf8_decode_bytes (u, umax, b, np)}
261#% #v+
262#%   SLuchar_Type *u, *umax;
263#%   unsigned char *b;
264#%   unsigned int *np;
265#% #v-
266#% \description
267#% \seealso{}
268#% \done
269
270#% \function{SLutf8_encode_bytes}
271#% \synopsis{UTF-8 encode an byte-sequence}
272#% \usage{SLuchar_Type *SLutf8_encode_bytes (b, bmax, u, ulen, np)}
273#% #v+
274#%    unsigned char *b, *bmax;
275#%    SLuchar_Type *u;
276#%    unsigned int ulen;
277#%    unsigned int *np;
278#% #v-
279#% \description
280#%   The \cfun{SLutf8_encode_bytes} function UTF-8 encodes each byte
281#%   between \exmp{b} and \exmp{bmax} and placing the \exmp{bmax-b}
282#%   encoded characters into the buffer at \exmp{u}, whose length is
283#%   given by \exmp{ulen}.  Upon return, \exmp{*np} will be set to the
284#%   number of bytes sucessfully encoded.  The number will be less than
285#%   the number requested if the buffer at \exmp{u} is too small.
286#% \notes
287#%   This function interprets the value of each byte as a wide-character
288#%   to be encoded.  As such, the function can be used to UTF-8 encode
289#%   characters from an iso-latin-1 character set.
290#% \seealso{SLutf8_decode_bytes}
291#% \done
292
293