1 /*
2 ******************************************************************************
3 *
4 * © 2016 and later: Unicode, Inc. and others.
5 * License & terms of use: http://www.unicode.org/copyright.html
6 *
7 ******************************************************************************
8 *   file name:  ubiditransform.h
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2016jul24
14 *   created by: Lina Kemmel
15 *
16 */
17 
18 #ifndef UBIDITRANSFORM_H
19 #define UBIDITRANSFORM_H
20 
21 #include "unicode/utypes.h"
22 #include "unicode/ubidi.h"
23 #include "unicode/uchar.h"
24 #include "unicode/localpointer.h"
25 
26 /**
27  * \file
28  * \brief Bidi Transformations
29  */
30 
31 /**
32  * `UBiDiOrder` indicates the order of text.
33  *
34  * This bidi transformation engine supports all possible combinations (4 in
35  * total) of input and output text order:
36  *
37  *   - <logical input, visual output>: unless the output direction is RTL, this
38  *     corresponds to a normal operation of the Bidi algorithm as described in the
39  *     Unicode Technical Report and implemented by `UBiDi` when the
40  *     reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
41  *     mode is not supported by `UBiDi` and is accomplished through
42  *     reversing a visual LTR string,
43  *
44  *   - <visual input, logical output>: unless the input direction is RTL, this
45  *     corresponds to an "inverse bidi algorithm" in `UBiDi` with the
46  *     reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
47  *     Visual RTL mode is not not supported by `UBiDi` and is
48  *     accomplished through reversing a visual LTR string,
49  *
50  *   - <logical input, logical output>: if the input and output base directions
51  *     mismatch, this corresponds to the `UBiDi` implementation with the
52  *     reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
53  *     input and output base directions are identical, the transformation engine
54  *     will only handle character mirroring and Arabic shaping operations without
55  *     reordering,
56  *
57  *   - <visual input, visual output>: this reordering mode is not supported by
58  *     the `UBiDi` engine; it implies character mirroring, Arabic
59  *     shaping, and - if the input/output base directions mismatch -  string
60  *     reverse operations.
61  * @see ubidi_setInverse
62  * @see ubidi_setReorderingMode
63  * @see UBIDI_REORDER_DEFAULT
64  * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
65  * @see UBIDI_REORDER_RUNS_ONLY
66  * @stable ICU 58
67  */
68 typedef enum {
69     /** 0: Constant indicating a logical order.
70       * This is the default for input text.
71       * @stable ICU 58
72       */
73     UBIDI_LOGICAL = 0,
74     /** 1: Constant indicating a visual order.
75       * This is a default for output text.
76       * @stable ICU 58
77       */
78     UBIDI_VISUAL
79 } UBiDiOrder;
80 
81 /**
82  * <code>UBiDiMirroring</code> indicates whether or not characters with the
83  * "mirrored" property in RTL runs should be replaced with their mirror-image
84  * counterparts.
85  * @see UBIDI_DO_MIRRORING
86  * @see ubidi_setReorderingOptions
87  * @see ubidi_writeReordered
88  * @see ubidi_writeReverse
89  * @stable ICU 58
90  */
91 typedef enum {
92     /** 0: Constant indicating that character mirroring should not be
93       * performed.
94       * This is the default.
95       * @stable ICU 58
96       */
97     UBIDI_MIRRORING_OFF = 0,
98     /** 1: Constant indicating that character mirroring should be performed.
99       * This corresponds to calling <code>ubidi_writeReordered</code> or
100       * <code>ubidi_writeReverse</code> with the
101       * <code>UBIDI_DO_MIRRORING</code> option bit set.
102       * @stable ICU 58
103       */
104     UBIDI_MIRRORING_ON
105 } UBiDiMirroring;
106 
107 /**
108  * Forward declaration of the <code>UBiDiTransform</code> structure that stores
109  * information used by the layout transformation engine.
110  * @stable ICU 58
111  */
112 typedef struct UBiDiTransform UBiDiTransform;
113 
114 /**
115  * Performs transformation of text from the bidi layout defined by the input
116  * ordering scheme to the bidi layout defined by the output ordering scheme,
117  * and applies character mirroring and Arabic shaping operations.<p>
118  * In terms of <code>UBiDi</code>, such a transformation implies:
119  * <ul>
120  * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
121  * reordering mode is other than normal),</li>
122  * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
123  * transformed from a visual to a logical form),</li>
124  * <li>resolving embedding levels of each character in the input text by
125  * calling <code>ubidi_setPara</code>,</li>
126  * <li>reordering the characters based on the computed embedding levels, also
127  * performing character mirroring as needed, and streaming the result to the
128  * output, by calling <code>ubidi_writeReordered</code>,</li>
129  * <li>performing Arabic digit and letter shaping on the output text by calling
130  * <code>u_shapeArabic</code>.</li>
131  * </ul>
132  * An "ordering scheme" encompasses the base direction and the order of text,
133  * and these characteristics must be defined by the caller for both input and
134  * output explicitly .<p>
135  * There are 36 possible combinations of <input, output> ordering schemes,
136  * which are partially supported by <code>UBiDi</code> already. Examples of the
137  * currently supported combinations:
138  * <ul>
139  * <li><Logical LTR, Visual LTR>: this is equivalent to calling
140  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
141  * <li><Logical RTL, Visual LTR>: this is equivalent to calling
142  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
143  * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
144  * calling <code>ubidi_setPara</code> with
145  * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
146  * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
147  * calling <code>ubidi_setPara</code> with
148  * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
149  * <li><Visual LTR, Logical LTR>: this is equivalent to
150  * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
151  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
152  * <li><Visual LTR, Logical RTL>: this is equivalent to
153  * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
154  * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
155  * </ul>
156  * All combinations that involve the Visual RTL scheme are unsupported by
157  * <code>UBiDi</code>, for instance:
158  * <ul>
159  * <li><Logical LTR, Visual RTL>,</li>
160  * <li><Visual RTL, Logical RTL>.</li>
161  * </ul>
162  * <p>Example of usage of the transformation engine:<br>
163  * <pre>
164  * \code
165  * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
166  * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
167  * UErrorCode errorCode = U_ZERO_ERROR;
168  * // Run a transformation.
169  * ubiditransform_transform(pBidiTransform,
170  *          text1, -1, text2, -1,
171  *          UBIDI_LTR, UBIDI_VISUAL,
172  *          UBIDI_RTL, UBIDI_LOGICAL,
173  *          UBIDI_MIRRORING_OFF,
174  *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
175  *          &errorCode);
176  * // Do something with text2.
177  *  text2[4] = '2';
178  * // Run a reverse transformation.
179  * ubiditransform_transform(pBidiTransform,
180  *          text2, -1, text1, -1,
181  *          UBIDI_RTL, UBIDI_LOGICAL,
182  *          UBIDI_LTR, UBIDI_VISUAL,
183  *          UBIDI_MIRRORING_OFF,
184  *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
185  *          &errorCode);
186  *\endcode
187  * </pre>
188  * </p>
189  *
190  * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
191  *        allocated with <code>ubiditransform_open()</code> or
192  *        <code>NULL</code>.<p>
193  *        This object serves for one-time setup to amortize initialization
194  *        overheads. Use of this object is not thread-safe. All other threads
195  *        should allocate a new <code>UBiDiTransform</code> object by calling
196  *        <code>ubiditransform_open()</code> before using it. Alternatively,
197  *        a caller can set this parameter to <code>NULL</code>, in which case
198  *        the object will be allocated by the engine on the fly.</p>
199  * @param src A pointer to the text that the Bidi layout transformations will
200  *        be performed on.
201  *        <p><strong>Note:</strong> the text must be (at least)
202  *        <code>srcLength</code> long.</p>
203  * @param srcLength The length of the text, in number of UChars. If
204  *        <code>length == -1</code> then the text must be zero-terminated.
205  * @param dest A pointer to where the processed text is to be copied.
206  * @param destSize The size of the <code>dest</code> buffer, in number of
207  *        UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
208  *        then the destination length could be as large as
209  *        <code>srcLength * 2</code>. Otherwise, the destination length will
210  *        not exceed <code>srcLength</code>. If the caller reserves the last
211  *        position for zero-termination, it should be excluded from
212  *        <code>destSize</code>.
213  *        <p><code>destSize == -1</code> is allowed and makes sense when
214  *        <code>dest</code> was holds some meaningful value, e.g. that of
215  *        <code>src</code>. In this case <code>dest</code> must be
216  *        zero-terminated.</p>
217  * @param inParaLevel A base embedding level of the input as defined in
218  *        <code>ubidi_setPara</code> documentation for the
219  *        <code>paraLevel</code> parameter.
220  * @param inOrder An order of the input, which can be one of the
221  *        <code>UBiDiOrder</code> values.
222  * @param outParaLevel A base embedding level of the output as defined in
223  *        <code>ubidi_setPara</code> documentation for the
224  *        <code>paraLevel</code> parameter.
225  * @param outOrder An order of the output, which can be one of the
226  *        <code>UBiDiOrder</code> values.
227  * @param doMirroring Indicates whether or not to perform character mirroring,
228  *        and can accept one of the <code>UBiDiMirroring</code> values.
229  * @param shapingOptions Arabic digit and letter shaping options defined in the
230  *        ushape.h documentation.
231  *        <p><strong>Note:</strong> Direction indicator options are computed by
232  *        the transformation engine based on the effective ordering schemes, so
233  *        user-defined direction indicators will be ignored.</p>
234  * @param pErrorCode A pointer to an error code value.
235  *
236  * @return The destination length, i.e. the number of UChars written to
237  *         <code>dest</code>. If the transformation fails, the return value
238  *         will be 0 (and the error code will be written to
239  *         <code>pErrorCode</code>).
240  *
241  * @see UBiDiLevel
242  * @see UBiDiOrder
243  * @see UBiDiMirroring
244  * @see ubidi_setPara
245  * @see u_shapeArabic
246  * @stable ICU 58
247  */
248 U_STABLE uint32_t U_EXPORT2
249 ubiditransform_transform(UBiDiTransform *pBiDiTransform,
250             const UChar *src, int32_t srcLength,
251             UChar *dest, int32_t destSize,
252             UBiDiLevel inParaLevel, UBiDiOrder inOrder,
253             UBiDiLevel outParaLevel, UBiDiOrder outOrder,
254             UBiDiMirroring doMirroring, uint32_t shapingOptions,
255             UErrorCode *pErrorCode);
256 
257 /**
258  * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
259  * e.g. with different ordering schemes, mirroring or shaping options.<p>
260  * <strong>Note:</strong>The object can only be reused in the same thread.
261  * All other threads should allocate a new <code>UBiDiTransform</code> object
262  * before using it.<p>
263  * Example of usage:<p>
264  * <pre>
265  * \code
266  * UErrorCode errorCode = U_ZERO_ERROR;
267  * // Open a new UBiDiTransform.
268  * UBiDiTransform* transform = ubiditransform_open(&errorCode);
269  * // Run a transformation.
270  * ubiditransform_transform(transform,
271  *          text1, -1, text2, -1,
272  *          UBIDI_RTL, UBIDI_LOGICAL,
273  *          UBIDI_LTR, UBIDI_VISUAL,
274  *          UBIDI_MIRRORING_ON,
275  *          U_SHAPE_DIGITS_EN2AN,
276  *          &errorCode);
277  * // Do something with the output text and invoke another transformation using
278  * //   that text as input.
279  * ubiditransform_transform(transform,
280  *          text2, -1, text3, -1,
281  *          UBIDI_LTR, UBIDI_VISUAL,
282  *          UBIDI_RTL, UBIDI_VISUAL,
283  *          UBIDI_MIRRORING_ON,
284  *          0, &errorCode);
285  *\endcode
286  * </pre>
287  * <p>
288  * The <code>UBiDiTransform</code> object must be deallocated by calling
289  * <code>ubiditransform_close()</code>.
290  *
291  * @return An empty <code>UBiDiTransform</code> object.
292  * @stable ICU 58
293  */
294 U_STABLE UBiDiTransform* U_EXPORT2
295 ubiditransform_open(UErrorCode *pErrorCode);
296 
297 /**
298  * Deallocates the given <code>UBiDiTransform</code> object.
299  * @stable ICU 58
300  */
301 U_STABLE void U_EXPORT2
302 ubiditransform_close(UBiDiTransform *pBidiTransform);
303 
304 #if U_SHOW_CPLUSPLUS_API
305 
306 U_NAMESPACE_BEGIN
307 
308 /**
309  * \class LocalUBiDiTransformPointer
310  * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
311  * For most methods see the LocalPointerBase base class.
312  *
313  * @see LocalPointerBase
314  * @see LocalPointer
315  * @stable ICU 58
316  */
317 U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
318 
319 U_NAMESPACE_END
320 
321 #endif
322 
323 #endif
324