1 /*
2 ******************************************************************************
3 *
4 * © 2016 and later: Unicode, Inc. and others.
5 * License & terms of use: http://www.unicode.org/copyright.html
6 *
7 ******************************************************************************
8 *   file name:  ubiditransform.h
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2016jul24
14 *   created by: Lina Kemmel
15 *
16 */
17 
18 #ifndef UBIDITRANSFORM_H
19 #define UBIDITRANSFORM_H
20 
21 #include "unicode/utypes.h"
22 #include "unicode/ubidi.h"
23 #include "unicode/uchar.h"
24 
25 #if U_SHOW_CPLUSPLUS_API
26 #include "unicode/localpointer.h"
27 #endif   // U_SHOW_CPLUSPLUS_API
28 
29 /**
30  * \file
31  * \brief Bidi Transformations
32  */
33 
34 /**
35  * `UBiDiOrder` indicates the order of text.
36  *
37  * This bidi transformation engine supports all possible combinations (4 in
38  * total) of input and output text order:
39  *
40  *   - <logical input, visual output>: unless the output direction is RTL, this
41  *     corresponds to a normal operation of the Bidi algorithm as described in the
42  *     Unicode Technical Report and implemented by `UBiDi` when the
43  *     reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
44  *     mode is not supported by `UBiDi` and is accomplished through
45  *     reversing a visual LTR string,
46  *
47  *   - <visual input, logical output>: unless the input direction is RTL, this
48  *     corresponds to an "inverse bidi algorithm" in `UBiDi` with the
49  *     reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
50  *     Visual RTL mode is not not supported by `UBiDi` and is
51  *     accomplished through reversing a visual LTR string,
52  *
53  *   - <logical input, logical output>: if the input and output base directions
54  *     mismatch, this corresponds to the `UBiDi` implementation with the
55  *     reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
56  *     input and output base directions are identical, the transformation engine
57  *     will only handle character mirroring and Arabic shaping operations without
58  *     reordering,
59  *
60  *   - <visual input, visual output>: this reordering mode is not supported by
61  *     the `UBiDi` engine; it implies character mirroring, Arabic
62  *     shaping, and - if the input/output base directions mismatch -  string
63  *     reverse operations.
64  * @see ubidi_setInverse
65  * @see ubidi_setReorderingMode
66  * @see UBIDI_REORDER_DEFAULT
67  * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
68  * @see UBIDI_REORDER_RUNS_ONLY
69  * @stable ICU 58
70  */
71 typedef enum {
72     /** 0: Constant indicating a logical order.
73       * This is the default for input text.
74       * @stable ICU 58
75       */
76     UBIDI_LOGICAL = 0,
77     /** 1: Constant indicating a visual order.
78       * This is a default for output text.
79       * @stable ICU 58
80       */
81     UBIDI_VISUAL
82 } UBiDiOrder;
83 
84 /**
85  * <code>UBiDiMirroring</code> indicates whether or not characters with the
86  * "mirrored" property in RTL runs should be replaced with their mirror-image
87  * counterparts.
88  * @see UBIDI_DO_MIRRORING
89  * @see ubidi_setReorderingOptions
90  * @see ubidi_writeReordered
91  * @see ubidi_writeReverse
92  * @stable ICU 58
93  */
94 typedef enum {
95     /** 0: Constant indicating that character mirroring should not be
96       * performed.
97       * This is the default.
98       * @stable ICU 58
99       */
100     UBIDI_MIRRORING_OFF = 0,
101     /** 1: Constant indicating that character mirroring should be performed.
102       * This corresponds to calling <code>ubidi_writeReordered</code> or
103       * <code>ubidi_writeReverse</code> with the
104       * <code>UBIDI_DO_MIRRORING</code> option bit set.
105       * @stable ICU 58
106       */
107     UBIDI_MIRRORING_ON
108 } UBiDiMirroring;
109 
110 /**
111  * Forward declaration of the <code>UBiDiTransform</code> structure that stores
112  * information used by the layout transformation engine.
113  * @stable ICU 58
114  */
115 typedef struct UBiDiTransform UBiDiTransform;
116 
117 /**
118  * Performs transformation of text from the bidi layout defined by the input
119  * ordering scheme to the bidi layout defined by the output ordering scheme,
120  * and applies character mirroring and Arabic shaping operations.<p>
121  * In terms of <code>UBiDi</code>, such a transformation implies:
122  * <ul>
123  * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
124  * reordering mode is other than normal),</li>
125  * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
126  * transformed from a visual to a logical form),</li>
127  * <li>resolving embedding levels of each character in the input text by
128  * calling <code>ubidi_setPara</code>,</li>
129  * <li>reordering the characters based on the computed embedding levels, also
130  * performing character mirroring as needed, and streaming the result to the
131  * output, by calling <code>ubidi_writeReordered</code>,</li>
132  * <li>performing Arabic digit and letter shaping on the output text by calling
133  * <code>u_shapeArabic</code>.</li>
134  * </ul>
135  * An "ordering scheme" encompasses the base direction and the order of text,
136  * and these characteristics must be defined by the caller for both input and
137  * output explicitly .<p>
138  * There are 36 possible combinations of <input, output> ordering schemes,
139  * which are partially supported by <code>UBiDi</code> already. Examples of the
140  * currently supported combinations:
141  * <ul>
142  * <li><Logical LTR, Visual LTR>: this is equivalent to calling
143  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
144  * <li><Logical RTL, Visual LTR>: this is equivalent to calling
145  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
146  * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
147  * calling <code>ubidi_setPara</code> with
148  * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
149  * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
150  * calling <code>ubidi_setPara</code> with
151  * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
152  * <li><Visual LTR, Logical LTR>: this is equivalent to
153  * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
154  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
155  * <li><Visual LTR, Logical RTL>: this is equivalent to
156  * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
157  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
158  * </ul>
159  * All combinations that involve the Visual RTL scheme are unsupported by
160  * <code>UBiDi</code>, for instance:
161  * <ul>
162  * <li><Logical LTR, Visual RTL>,</li>
163  * <li><Visual RTL, Logical RTL>.</li>
164  * </ul>
165  * <p>Example of usage of the transformation engine:<br>
166  * <pre>
167  * \code
168  * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
169  * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
170  * UErrorCode errorCode = U_ZERO_ERROR;
171  * // Run a transformation.
172  * ubiditransform_transform(pBidiTransform,
173  *          text1, -1, text2, -1,
174  *          UBIDI_LTR, UBIDI_VISUAL,
175  *          UBIDI_RTL, UBIDI_LOGICAL,
176  *          UBIDI_MIRRORING_OFF,
177  *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
178  *          &errorCode);
179  * // Do something with text2.
180  *  text2[4] = '2';
181  * // Run a reverse transformation.
182  * ubiditransform_transform(pBidiTransform,
183  *          text2, -1, text1, -1,
184  *          UBIDI_RTL, UBIDI_LOGICAL,
185  *          UBIDI_LTR, UBIDI_VISUAL,
186  *          UBIDI_MIRRORING_OFF,
187  *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
188  *          &errorCode);
189  *\endcode
190  * </pre>
191  * </p>
192  *
193  * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
194  *        allocated with <code>ubiditransform_open()</code> or
195  *        <code>NULL</code>.<p>
196  *        This object serves for one-time setup to amortize initialization
197  *        overheads. Use of this object is not thread-safe. All other threads
198  *        should allocate a new <code>UBiDiTransform</code> object by calling
199  *        <code>ubiditransform_open()</code> before using it. Alternatively,
200  *        a caller can set this parameter to <code>NULL</code>, in which case
201  *        the object will be allocated by the engine on the fly.</p>
202  * @param src A pointer to the text that the Bidi layout transformations will
203  *        be performed on.
204  *        <p><strong>Note:</strong> the text must be (at least)
205  *        <code>srcLength</code> long.</p>
206  * @param srcLength The length of the text, in number of UChars. If
207  *        <code>length == -1</code> then the text must be zero-terminated.
208  * @param dest A pointer to where the processed text is to be copied.
209  * @param destSize The size of the <code>dest</code> buffer, in number of
210  *        UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
211  *        then the destination length could be as large as
212  *        <code>srcLength * 2</code>. Otherwise, the destination length will
213  *        not exceed <code>srcLength</code>. If the caller reserves the last
214  *        position for zero-termination, it should be excluded from
215  *        <code>destSize</code>.
216  *        <p><code>destSize == -1</code> is allowed and makes sense when
217  *        <code>dest</code> was holds some meaningful value, e.g. that of
218  *        <code>src</code>. In this case <code>dest</code> must be
219  *        zero-terminated.</p>
220  * @param inParaLevel A base embedding level of the input as defined in
221  *        <code>ubidi_setPara</code> documentation for the
222  *        <code>paraLevel</code> parameter.
223  * @param inOrder An order of the input, which can be one of the
224  *        <code>UBiDiOrder</code> values.
225  * @param outParaLevel A base embedding level of the output as defined in
226  *        <code>ubidi_setPara</code> documentation for the
227  *        <code>paraLevel</code> parameter.
228  * @param outOrder An order of the output, which can be one of the
229  *        <code>UBiDiOrder</code> values.
230  * @param doMirroring Indicates whether or not to perform character mirroring,
231  *        and can accept one of the <code>UBiDiMirroring</code> values.
232  * @param shapingOptions Arabic digit and letter shaping options defined in the
233  *        ushape.h documentation.
234  *        <p><strong>Note:</strong> Direction indicator options are computed by
235  *        the transformation engine based on the effective ordering schemes, so
236  *        user-defined direction indicators will be ignored.</p>
237  * @param pErrorCode A pointer to an error code value.
238  *
239  * @return The destination length, i.e. the number of UChars written to
240  *         <code>dest</code>. If the transformation fails, the return value
241  *         will be 0 (and the error code will be written to
242  *         <code>pErrorCode</code>).
243  *
244  * @see UBiDiLevel
245  * @see UBiDiOrder
246  * @see UBiDiMirroring
247  * @see ubidi_setPara
248  * @see u_shapeArabic
249  * @stable ICU 58
250  */
251 U_CAPI uint32_t U_EXPORT2
252 ubiditransform_transform(UBiDiTransform *pBiDiTransform,
253             const UChar *src, int32_t srcLength,
254             UChar *dest, int32_t destSize,
255             UBiDiLevel inParaLevel, UBiDiOrder inOrder,
256             UBiDiLevel outParaLevel, UBiDiOrder outOrder,
257             UBiDiMirroring doMirroring, uint32_t shapingOptions,
258             UErrorCode *pErrorCode);
259 
260 /**
261  * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
262  * e.g. with different ordering schemes, mirroring or shaping options.<p>
263  * <strong>Note:</strong>The object can only be reused in the same thread.
264  * All other threads should allocate a new <code>UBiDiTransform</code> object
265  * before using it.<p>
266  * Example of usage:<p>
267  * <pre>
268  * \code
269  * UErrorCode errorCode = U_ZERO_ERROR;
270  * // Open a new UBiDiTransform.
271  * UBiDiTransform* transform = ubiditransform_open(&errorCode);
272  * // Run a transformation.
273  * ubiditransform_transform(transform,
274  *          text1, -1, text2, -1,
275  *          UBIDI_RTL, UBIDI_LOGICAL,
276  *          UBIDI_LTR, UBIDI_VISUAL,
277  *          UBIDI_MIRRORING_ON,
278  *          U_SHAPE_DIGITS_EN2AN,
279  *          &errorCode);
280  * // Do something with the output text and invoke another transformation using
281  * //   that text as input.
282  * ubiditransform_transform(transform,
283  *          text2, -1, text3, -1,
284  *          UBIDI_LTR, UBIDI_VISUAL,
285  *          UBIDI_RTL, UBIDI_VISUAL,
286  *          UBIDI_MIRRORING_ON,
287  *          0, &errorCode);
288  *\endcode
289  * </pre>
290  * <p>
291  * The <code>UBiDiTransform</code> object must be deallocated by calling
292  * <code>ubiditransform_close()</code>.
293  *
294  * @return An empty <code>UBiDiTransform</code> object.
295  * @stable ICU 58
296  */
297 U_CAPI UBiDiTransform* U_EXPORT2
298 ubiditransform_open(UErrorCode *pErrorCode);
299 
300 /**
301  * Deallocates the given <code>UBiDiTransform</code> object.
302  * @stable ICU 58
303  */
304 U_CAPI void U_EXPORT2
305 ubiditransform_close(UBiDiTransform *pBidiTransform);
306 
307 #if U_SHOW_CPLUSPLUS_API
308 
309 U_NAMESPACE_BEGIN
310 
311 /**
312  * \class LocalUBiDiTransformPointer
313  * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
314  * For most methods see the LocalPointerBase base class.
315  *
316  * @see LocalPointerBase
317  * @see LocalPointer
318  * @stable ICU 58
319  */
320 U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
321 
322 U_NAMESPACE_END
323 
324 #endif
325 
326 #endif
327