1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
2  *
3  *  Copyright (C) 2010  Uri Sivan
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2, or (at your option)
8  *  any later version.
9  *
10  *  The Rhythmbox authors hereby grant permission for non-GPL compatible
11  *  GStreamer plugins to be used and distributed together with GStreamer
12  *  and Rhythmbox. This permission is above and beyond the permissions granted
13  *  by the GPL license by which Rhythmbox is covered. If you modify this code
14  *  you may extend this exception to your version of the code, but you are not
15  *  obligated to do so. If you do not wish to do so, delete this exception
16  *  statement from your version.
17  *
18  *  This program is distributed in the hope that it will be useful,
19  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  *  GNU General Public License for more details.
22  *
23  *  You should have received a copy of the GNU General Public License
24  *  along with this program; if not, write to the Free Software
25  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA.
26  *
27  */
28 
29 #include "config.h"
30 
31 #include <rb-text-helpers.h>
32 
33 /**
34  * SECTION:rb-text-helpers
35  * @short_description: text direction (LTR/RTL) functions
36  *
37  * Provides some helper functions for constructing strings that
38  * may include both left-to-right and right-to-left text.
39  */
40 
41 /* unicode direction markup characters
42  * see http://unicode.org/reports/tr9/, in particular sections 2.1-2.4
43  *
44  * LRM = Left-to-Right Mark = invisible character with LTR direction
45  * RLM = Right-to-Left Mark = invisible character with RTL direction
46  * LRE = Left-to-Right Embedding = start of LTR "island" in RTL text
47  * RLE = Right-to-Left Embedding = start of RTL "island" in LTR text
48  * PDF = Pop Directional Format = close last LRE or RLE section
49  *
50  * the following constants are in UTF-8 encoding
51  */
52 static const char *const UNICODE_LRM = "\xE2\x80\x8E";
53 static const char *const UNICODE_RLM = "\xE2\x80\x8F";
54 static const char *const UNICODE_LRE = "\xE2\x80\xAA";
55 static const char *const UNICODE_RLE = "\xE2\x80\xAB";
56 static const char *const UNICODE_PDF = "\xE2\x80\xAC";
57 
58 static void
append_and_free(GString * str,char * text)59 append_and_free (GString *str, char *text)
60 {
61 	g_string_append (str, text);
62 	g_free (text);
63 }
64 
65 /**
66  * rb_text_direction_conflict:
67  * @dir1: direction A
68  * @dir2: direction B
69  *
70  * Direction conflict here means the two directions are defined (non-neutral)
71  * and they are different.
72  *
73  * Return value: %TRUE if the two directions conflict.
74  */
75 gboolean
rb_text_direction_conflict(PangoDirection dir1,PangoDirection dir2)76 rb_text_direction_conflict (PangoDirection dir1, PangoDirection dir2)
77 {
78 	return (dir1 != dir2) &&
79 	       (dir1 != PANGO_DIRECTION_NEUTRAL) &&
80 	       (dir2 != PANGO_DIRECTION_NEUTRAL);
81 }
82 
83 /**
84  * rb_text_common_direction:
85  * @first: first string
86  * @...: rest of strings, terminated with %NULL
87  *
88  * This functions checks the direction of all given strings and:
89  *
90  * 1. If all strings are direction neutral, returns %PANGO_DIRECTION_NEUTRAL;
91  *
92  * 2. If all strings are either LTR or neutral, returns %PANGO_DIRECTION_LTR;
93  *
94  * 3. If all strings are either RTL or neutral, returns %PANGO_DIRECTION_RTL;
95  *
96  * 4. If at least one is RTL and one LTR, returns %PANGO_DIRECTION_NEUTRAL.
97  *
98  * Note: neutral (1) and mixed (4) are two very different situations,
99  * they share a return code here only because they're the same for our
100  * specific use.
101  *
102  * Return value: common direction of all strings, as defined above.
103  */
104 PangoDirection
rb_text_common_direction(const char * first,...)105 rb_text_common_direction (const char *first, ...)
106 {
107 	PangoDirection common_dir = PANGO_DIRECTION_NEUTRAL;
108 	PangoDirection text_dir;
109 	const char *text;
110 	va_list args;
111 
112 	va_start (args, first);
113 
114 	for (text = first; text; text = va_arg(args, const char *)) {
115 		if (!text[0])
116 			continue;
117 
118 		text_dir = pango_find_base_dir (text, -1);
119 
120 		if (rb_text_direction_conflict (text_dir, common_dir)) {
121 			/* mixed direction */
122 			common_dir = PANGO_DIRECTION_NEUTRAL;
123 			break;
124 		}
125 
126 		common_dir = text_dir;
127 	}
128 
129 	va_end (args);
130 
131 	return common_dir;
132 }
133 
134 /**
135  * rb_text_cat:
136  * @base_dir: direction of the result string.
137  * @...: pairs of strings (content, format) terminated with %NULL.
138  *
139  * This function concatenates strings to a single string, preserving
140  * each part's original direction (LTR or RTL) using unicode markup,
141  * as detailed here: http://unicode.org/reports/tr9/.
142  *
143  * It is called like this:
144  *
145  * s = rb_text_cat(base_dir, str1, format1, ..., strN, formatN, %NULL)
146  *
147  * Format is a printf format with exactly one \%s. "\%s" or "" will
148  * insert the string as is.
149  *
150  * Any string that is empty ("") will be skipped, its format must still be
151  * passed.
152  *
153  * A space is inserted between strings.
154  *
155  * The algorithm:
156  *
157  * 1. Caller supplies the base direction of the result in base_dir.
158  *
159  * 2. Insert either LRM or RLM at the beginning of the string to set
160  *    its base direction, according to base_dir.
161  *
162  * 3. Find the direction of each string using pango.
163  *
164  * 4. For strings that have the same direction as the base direction,
165  *    just insert them in.
166  *
167  * 5. For strings that have the opposite direction than the base one,
168  *    insert them surrounded with embedding codes RLE/LRE .. PDF.
169  *
170  * Return value: a new string containing the result.
171  */
172 char *
rb_text_cat(PangoDirection base_dir,...)173 rb_text_cat (PangoDirection base_dir, ...)
174 {
175 	PangoDirection text_dir;
176 	va_list args;
177 	const char *embed_start;
178 	const char *embed_stop = UNICODE_PDF;
179 	GString *result;
180 	int first_char;
181 
182 	va_start (args, base_dir);
183 
184 	result = g_string_sized_new (100);
185 
186 	if (base_dir == PANGO_DIRECTION_LTR) {
187 		/* base direction LTR, embedded parts are RTL */
188 		g_string_append (result, UNICODE_LRM);
189 		embed_start = UNICODE_RLE;
190 	} else {
191 		/* base direction RTL, embedded parts are LTR */
192 		g_string_append (result, UNICODE_RLM);
193 		embed_start = UNICODE_LRE;
194 	}
195 	first_char = result->len;
196 
197 	while (1) {
198 		const char *text = va_arg (args, const char *);
199 		const char *format;
200 
201 		if (!text)
202 			break;
203 
204 		format = va_arg (args, const char *);
205 		if (!text[0])
206 			continue;
207 		if (!format[0])
208 			format = "%s";
209 
210 		if (result->len > first_char) {
211 			g_string_append (result, " ");
212 		}
213 
214 		text_dir = pango_find_base_dir (text, -1);
215 
216 		if (rb_text_direction_conflict (text_dir, base_dir)) {
217 			/* surround text with embed codes */
218 			g_string_append (result, embed_start);
219 			append_and_free (result, g_markup_printf_escaped (format, text));
220 			g_string_append (result, embed_stop);
221 		} else {
222 			append_and_free (result, g_markup_printf_escaped (format, text));
223 		}
224 	}
225 
226 	va_end (args);
227 
228 	return g_string_free (result, FALSE);
229 }
230