1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2012, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  idna.h
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010mar05
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifndef __IDNA_H__
18 #define __IDNA_H__
19 
20 /**
21  * \file
22  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23  */
24 
25 #include "unicode/utypes.h"
26 
27 #if U_SHOW_CPLUSPLUS_API
28 
29 #if !UCONFIG_NO_IDNA
30 
31 #include "unicode/bytestream.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/uidna.h"
34 #include "unicode/unistr.h"
35 
36 U_NAMESPACE_BEGIN
37 
38 class IDNAInfo;
39 
40 /**
41  * Abstract base class for IDNA processing.
42  * See http://www.unicode.org/reports/tr46/
43  * and http://www.ietf.org/rfc/rfc3490.txt
44  *
45  * The IDNA class is not intended for public subclassing.
46  *
47  * This C++ API currently only implements UTS #46.
48  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
49  * and IDNA2003 (functions that do not use a service object).
50  * @stable ICU 4.6
51  */
52 class U_COMMON_API IDNA : public UObject {
53 public:
54     /**
55      * Destructor.
56      * @stable ICU 4.6
57      */
58     ~IDNA();
59 
60     /**
61      * Returns an IDNA instance which implements UTS #46.
62      * Returns an unmodifiable instance, owned by the caller.
63      * Cache it for multiple operations, and delete it when done.
64      * The instance is thread-safe, that is, it can be used concurrently.
65      *
66      * UTS #46 defines Unicode IDNA Compatibility Processing,
67      * updated to the latest version of Unicode and compatible with both
68      * IDNA2003 and IDNA2008.
69      *
70      * The worker functions use transitional processing, including deviation mappings,
71      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
72      * is used in which case the deviation characters are passed through without change.
73      *
74      * Disallowed characters are mapped to U+FFFD.
75      *
76      * For available options see the uidna.h header.
77      * Operations with the UTS #46 instance do not support the
78      * UIDNA_ALLOW_UNASSIGNED option.
79      *
80      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
81      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
82      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
83      *
84      * @param options Bit set to modify the processing and error checking.
85      *                See option bit set values in uidna.h.
86      * @param errorCode Standard ICU error code. Its input value must
87      *                  pass the U_SUCCESS() test, or else the function returns
88      *                  immediately. Check for U_FAILURE() on output or use with
89      *                  function chaining. (See User Guide for details.)
90      * @return the UTS #46 IDNA instance, if successful
91      * @stable ICU 4.6
92      */
93     static IDNA *
94     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
95 
96     /**
97      * Converts a single domain name label into its ASCII form for DNS lookup.
98      * If any processing step fails, then info.hasErrors() will be TRUE and
99      * the result might not be an ASCII string.
100      * The label might be modified according to the types of errors.
101      * Labels with severe errors will be left in (or turned into) their Unicode form.
102      *
103      * The UErrorCode indicates an error only in exceptional cases,
104      * such as a U_MEMORY_ALLOCATION_ERROR.
105      *
106      * @param label Input domain name label
107      * @param dest Destination string object
108      * @param info Output container of IDNA processing details.
109      * @param errorCode Standard ICU error code. Its input value must
110      *                  pass the U_SUCCESS() test, or else the function returns
111      *                  immediately. Check for U_FAILURE() on output or use with
112      *                  function chaining. (See User Guide for details.)
113      * @return dest
114      * @stable ICU 4.6
115      */
116     virtual UnicodeString &
117     labelToASCII(const UnicodeString &label, UnicodeString &dest,
118                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
119 
120     /**
121      * Converts a single domain name label into its Unicode form for human-readable display.
122      * If any processing step fails, then info.hasErrors() will be TRUE.
123      * The label might be modified according to the types of errors.
124      *
125      * The UErrorCode indicates an error only in exceptional cases,
126      * such as a U_MEMORY_ALLOCATION_ERROR.
127      *
128      * @param label Input domain name label
129      * @param dest Destination string object
130      * @param info Output container of IDNA processing details.
131      * @param errorCode Standard ICU error code. Its input value must
132      *                  pass the U_SUCCESS() test, or else the function returns
133      *                  immediately. Check for U_FAILURE() on output or use with
134      *                  function chaining. (See User Guide for details.)
135      * @return dest
136      * @stable ICU 4.6
137      */
138     virtual UnicodeString &
139     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
140                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
141 
142     /**
143      * Converts a whole domain name into its ASCII form for DNS lookup.
144      * If any processing step fails, then info.hasErrors() will be TRUE and
145      * the result might not be an ASCII string.
146      * The domain name might be modified according to the types of errors.
147      * Labels with severe errors will be left in (or turned into) their Unicode form.
148      *
149      * The UErrorCode indicates an error only in exceptional cases,
150      * such as a U_MEMORY_ALLOCATION_ERROR.
151      *
152      * @param name Input domain name
153      * @param dest Destination string object
154      * @param info Output container of IDNA processing details.
155      * @param errorCode Standard ICU error code. Its input value must
156      *                  pass the U_SUCCESS() test, or else the function returns
157      *                  immediately. Check for U_FAILURE() on output or use with
158      *                  function chaining. (See User Guide for details.)
159      * @return dest
160      * @stable ICU 4.6
161      */
162     virtual UnicodeString &
163     nameToASCII(const UnicodeString &name, UnicodeString &dest,
164                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
165 
166     /**
167      * Converts a whole domain name into its Unicode form for human-readable display.
168      * If any processing step fails, then info.hasErrors() will be TRUE.
169      * The domain name might be modified according to the types of errors.
170      *
171      * The UErrorCode indicates an error only in exceptional cases,
172      * such as a U_MEMORY_ALLOCATION_ERROR.
173      *
174      * @param name Input domain name
175      * @param dest Destination string object
176      * @param info Output container of IDNA processing details.
177      * @param errorCode Standard ICU error code. Its input value must
178      *                  pass the U_SUCCESS() test, or else the function returns
179      *                  immediately. Check for U_FAILURE() on output or use with
180      *                  function chaining. (See User Guide for details.)
181      * @return dest
182      * @stable ICU 4.6
183      */
184     virtual UnicodeString &
185     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
186                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
187 
188     // UTF-8 versions of the processing methods ---------------------------- ***
189 
190     /**
191      * Converts a single domain name label into its ASCII form for DNS lookup.
192      * UTF-8 version of labelToASCII(), same behavior.
193      *
194      * @param label Input domain name label
195      * @param dest Destination byte sink; Flush()ed if successful
196      * @param info Output container of IDNA processing details.
197      * @param errorCode Standard ICU error code. Its input value must
198      *                  pass the U_SUCCESS() test, or else the function returns
199      *                  immediately. Check for U_FAILURE() on output or use with
200      *                  function chaining. (See User Guide for details.)
201      * @return dest
202      * @stable ICU 4.6
203      */
204     virtual void
205     labelToASCII_UTF8(StringPiece label, ByteSink &dest,
206                       IDNAInfo &info, UErrorCode &errorCode) const;
207 
208     /**
209      * Converts a single domain name label into its Unicode form for human-readable display.
210      * UTF-8 version of labelToUnicode(), same behavior.
211      *
212      * @param label Input domain name label
213      * @param dest Destination byte sink; Flush()ed if successful
214      * @param info Output container of IDNA processing details.
215      * @param errorCode Standard ICU error code. Its input value must
216      *                  pass the U_SUCCESS() test, or else the function returns
217      *                  immediately. Check for U_FAILURE() on output or use with
218      *                  function chaining. (See User Guide for details.)
219      * @return dest
220      * @stable ICU 4.6
221      */
222     virtual void
223     labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
224                        IDNAInfo &info, UErrorCode &errorCode) const;
225 
226     /**
227      * Converts a whole domain name into its ASCII form for DNS lookup.
228      * UTF-8 version of nameToASCII(), same behavior.
229      *
230      * @param name Input domain name
231      * @param dest Destination byte sink; Flush()ed if successful
232      * @param info Output container of IDNA processing details.
233      * @param errorCode Standard ICU error code. Its input value must
234      *                  pass the U_SUCCESS() test, or else the function returns
235      *                  immediately. Check for U_FAILURE() on output or use with
236      *                  function chaining. (See User Guide for details.)
237      * @return dest
238      * @stable ICU 4.6
239      */
240     virtual void
241     nameToASCII_UTF8(StringPiece name, ByteSink &dest,
242                      IDNAInfo &info, UErrorCode &errorCode) const;
243 
244     /**
245      * Converts a whole domain name into its Unicode form for human-readable display.
246      * UTF-8 version of nameToUnicode(), same behavior.
247      *
248      * @param name Input domain name
249      * @param dest Destination byte sink; Flush()ed if successful
250      * @param info Output container of IDNA processing details.
251      * @param errorCode Standard ICU error code. Its input value must
252      *                  pass the U_SUCCESS() test, or else the function returns
253      *                  immediately. Check for U_FAILURE() on output or use with
254      *                  function chaining. (See User Guide for details.)
255      * @return dest
256      * @stable ICU 4.6
257      */
258     virtual void
259     nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
260                       IDNAInfo &info, UErrorCode &errorCode) const;
261 };
262 
263 class UTS46;
264 
265 /**
266  * Output container for IDNA processing errors.
267  * The IDNAInfo class is not suitable for subclassing.
268  * @stable ICU 4.6
269  */
270 class U_COMMON_API IDNAInfo : public UMemory {
271 public:
272     /**
273      * Constructor for stack allocation.
274      * @stable ICU 4.6
275      */
IDNAInfo()276     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
277     /**
278      * Were there IDNA processing errors?
279      * @return TRUE if there were processing errors
280      * @stable ICU 4.6
281      */
hasErrors()282     UBool hasErrors() const { return errors!=0; }
283     /**
284      * Returns a bit set indicating IDNA processing errors.
285      * See UIDNA_ERROR_... constants in uidna.h.
286      * @return bit set of processing errors
287      * @stable ICU 4.6
288      */
getErrors()289     uint32_t getErrors() const { return errors; }
290     /**
291      * Returns TRUE if transitional and nontransitional processing produce different results.
292      * This is the case when the input label or domain name contains
293      * one or more deviation characters outside a Punycode label (see UTS #46).
294      * <ul>
295      * <li>With nontransitional processing, such characters are
296      * copied to the destination string.
297      * <li>With transitional processing, such characters are
298      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
299      * </ul>
300      * @return TRUE if transitional and nontransitional processing produce different results
301      * @stable ICU 4.6
302      */
isTransitionalDifferent()303     UBool isTransitionalDifferent() const { return isTransDiff; }
304 
305 private:
306     friend class UTS46;
307 
308     IDNAInfo(const IDNAInfo &other);  // no copying
309     IDNAInfo &operator=(const IDNAInfo &other);  // no copying
310 
reset()311     void reset() {
312         errors=labelErrors=0;
313         isTransDiff=FALSE;
314         isBiDi=FALSE;
315         isOkBiDi=TRUE;
316     }
317 
318     uint32_t errors, labelErrors;
319     UBool isTransDiff;
320     UBool isBiDi;
321     UBool isOkBiDi;
322 };
323 
324 U_NAMESPACE_END
325 
326 #endif  // UCONFIG_NO_IDNA
327 
328 #endif /* U_SHOW_CPLUSPLUS_API */
329 
330 #endif  // __IDNA_H__
331