1 /* EasyTAG - tag editor for audio files
2 * Copyright (C) 2014 David King <amigadave@amigadave.com>
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 #include "scan.h"
20
21 #include <string.h>
22
23 /*
24 * Function to replace underscore '_' by a space. No need to reallocate.
25 */
26 void
Scan_Convert_Underscore_Into_Space(gchar * string)27 Scan_Convert_Underscore_Into_Space (gchar *string)
28 {
29 gchar *tmp = string;
30
31 while ((tmp = strchr (tmp, '_')) != NULL)
32 {
33 *tmp = ' ';
34 }
35 }
36
37 /*
38 * Function to replace %20 by a space. No need to reallocate.
39 */
40 void
Scan_Convert_P20_Into_Space(gchar * string)41 Scan_Convert_P20_Into_Space (gchar *string)
42 {
43 gchar *tmp, *tmp1;
44
45 while ((tmp = strstr (string, "%20")) != NULL)
46 {
47 tmp1 = tmp + 3;
48 *(tmp++) = ' ';
49 while (*tmp1)
50 *(tmp++) = *(tmp1++);
51 *tmp = '\0';
52 }
53 }
54
55 /*
56 * Function to replace space by '_'. No need to reallocate.
57 */
58 void
Scan_Convert_Space_Into_Underscore(gchar * string)59 Scan_Convert_Space_Into_Underscore (gchar *string)
60 {
61 gchar *tmp = string;
62
63 while ((tmp = strchr (tmp, ' ')) != NULL)
64 {
65 *tmp = '_';
66 }
67 }
68
69 void
Scan_Process_Fields_Remove_Space(gchar * string)70 Scan_Process_Fields_Remove_Space (gchar *string)
71 {
72 gchar *tmp, *tmp1;
73
74 tmp = tmp1 = string;
75
76 while (*tmp)
77 {
78 while (*tmp == ' ')
79 tmp++;
80 if (*tmp)
81 *(tmp1++) = *(tmp++);
82 }
83 *tmp1 = '\0';
84 }
85
86 /*
87 * Scan_Process_Fields_Insert_Space:
88 * @string: Input string
89 *
90 * This function will insert space before every uppercase character.
91 *
92 * Returns: A newly allocated string.
93 */
94 gchar *
Scan_Process_Fields_Insert_Space(const gchar * string)95 Scan_Process_Fields_Insert_Space (const gchar *string)
96 {
97 gchar *iter;
98 gunichar c;
99 GString *string1;
100
101 string1 = g_string_new ("");
102 g_string_append_c (string1, *string);
103
104 for (iter = g_utf8_next_char (string); *iter; iter = g_utf8_next_char (iter))
105 {
106 c = g_utf8_get_char (iter);
107
108 if (g_unichar_isupper (c))
109 {
110 g_string_append_c (string1, ' ');
111 }
112
113 g_string_append_unichar (string1, c);
114 }
115
116 return g_string_free (string1, FALSE);
117 }
118
119 /*
120 * The function removes the duplicated spaces. No need to reallocate.
121 */
122 void
Scan_Process_Fields_Keep_One_Space(gchar * string)123 Scan_Process_Fields_Keep_One_Space (gchar *string)
124 {
125 gchar *tmp, *tmp1;
126
127 tmp = tmp1 = string;
128
129 // Remove multiple consecutive underscores and spaces.
130 while (*tmp1)
131 {
132 while (*tmp1 && *tmp1 != ' ' && *tmp1 != '_')
133 *(tmp++) = *(tmp1++);
134 if (!*tmp1)
135 break;
136 *(tmp++) = *(tmp1++);
137 while (*tmp1 == ' ' || *tmp1 == '_')
138 tmp1++;
139 }
140 *tmp = '\0';
141 }
142
143 /*
144 * Function to remove spaces
145 * No need to reallocate
146 */
147 void
Scan_Remove_Spaces(gchar * string)148 Scan_Remove_Spaces (gchar *string)
149 {
150 int nextnotspace = 0, pos = 0;
151
152 while(string[pos] != '\0')
153 {
154 if(string[pos] == ' ')
155 {
156 nextnotspace = pos;
157 while(string[++nextnotspace] == ' ');
158 string[pos] = string[nextnotspace];
159 string[nextnotspace] = ' ';
160 continue;
161 }
162 pos++;
163 }
164 }
165
166 /* Returns a newly-allocated string. */
167 gchar *
Scan_Process_Fields_All_Uppercase(const gchar * string)168 Scan_Process_Fields_All_Uppercase (const gchar *string)
169 {
170 return g_utf8_strup (string, -1);
171 }
172
173 /* Returns a newly-allocated string. */
174 gchar *
Scan_Process_Fields_All_Downcase(const gchar * string)175 Scan_Process_Fields_All_Downcase (const gchar *string)
176 {
177 return g_utf8_strdown (string, -1);
178 }
179
180 /* Returns a newly-allocated string. */
181 gchar *
Scan_Process_Fields_Letter_Uppercase(const gchar * string)182 Scan_Process_Fields_Letter_Uppercase (const gchar *string)
183 {
184 const gchar *temp;
185 gchar temp2[6];
186 gboolean set_to_upper_case = TRUE;
187 gunichar c;
188 GString *string1;
189
190 string1 = g_string_new ("");
191
192 for (temp = string; *temp; temp = g_utf8_next_char (temp))
193 {
194 gchar *temp3;
195 int l;
196
197 c = g_utf8_get_char (temp);
198 l = g_unichar_to_utf8 (c, temp2);
199
200 if (set_to_upper_case && g_unichar_islower(c))
201 {
202 temp3 = g_utf8_strup (temp2, l);
203 g_string_append (string1, temp3);
204 g_free (temp3);
205 }
206 else if (!set_to_upper_case && g_unichar_isupper(c))
207 {
208 temp3 = g_utf8_strdown (temp2, l);
209 g_string_append (string1, temp3);
210 g_free (temp3);
211 }
212 else
213 {
214 g_string_append_len (string1, temp2, l);
215 }
216
217 /* Uppercase the word 'I' in english */
218 if (!set_to_upper_case &&
219 (*(temp - 1) == ' ' || *(temp - 1) == '_') &&
220 (*temp == 'i' || *temp == 'I') &&
221 (*(temp + 1) == ' ' || *(temp + 1) == '_'))
222 {
223 string1->str [string1->len - 1] = 'I';
224 }
225
226 /* After the first time, all will be lower case. */
227 set_to_upper_case = FALSE;
228 }
229
230 return g_string_free (string1, FALSE);
231 }
232
233 static gint
Scan_Word_Is_Roman_Numeral(const gchar * text)234 Scan_Word_Is_Roman_Numeral (const gchar *text)
235 {
236 /* No need for caseless strchr. */
237 static const gchar romans[] = "MmDdCcLlXxVvIi";
238
239 gsize next_allowed = 0;
240 gsize prev = 0;
241 gsize count = 0;
242 const gchar *i;
243
244 for (i = text; *i; i++)
245 {
246 const char *s = strchr (romans, *i);
247
248 if (s)
249 {
250 gsize c = (s - romans) / 2;
251
252 if (c < next_allowed)
253 {
254 return 0;
255 }
256
257 if (c < prev)
258 {
259 /* After subtraction, no more subtracted chars allowed. */
260 next_allowed = prev + 1;
261 }
262 else if (c == prev)
263 {
264 /* Allow indefinite repetition for m; three for c, x and i; and
265 * none for d, l and v. */
266 if ((c && ++count > 3) || (c & 1))
267 {
268 return 0;
269 }
270
271 /* No more subtraction. */
272 next_allowed = c;
273 }
274 else if (c && !(c & 1))
275 {
276 /* For first occurrence of c, x and i, allow "subtraction" from
277 * 10 and 5 times self, reset counting. */
278 next_allowed = c - 2;
279 count = 1;
280 }
281
282 prev = c;
283 }
284 else
285 {
286 if (g_unichar_isalnum (g_utf8_get_char (i)))
287 {
288 return 0;
289 }
290
291 break;
292 }
293 }
294
295 /* Return length of found Roman numeral. */
296 return i - text;
297 }
298
299 /*
300 * Function to set the first letter of each word to uppercase, according the "Chicago Manual of Style" (http://www.docstyles.com/cmscrib.htm#Note2)
301 * No needed to reallocate
302 */
303 void
Scan_Process_Fields_First_Letters_Uppercase(gchar ** str,gboolean uppercase_preps,gboolean handle_roman)304 Scan_Process_Fields_First_Letters_Uppercase (gchar **str,
305 gboolean uppercase_preps,
306 gboolean handle_roman)
307 {
308 /**** DANIEL TEST *****
309 gchar *iter;
310 gchar utf8_character[6];
311 gboolean set_to_upper_case = TRUE;
312 gunichar c;
313
314 for (iter = text; *iter; iter = g_utf8_next_char(iter))
315 {
316 c = g_utf8_get_char(iter);
317 if (set_to_upper_case && g_unichar_islower(c))
318 strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
319 else if (!set_to_upper_case && g_unichar_isupper(c))
320 strncpy(iter, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
321
322 set_to_upper_case = (g_unichar_isalpha(c)
323 || c == (gunichar)'.'
324 || c == (gunichar)'\''
325 || c == (gunichar)'`') ? FALSE : TRUE;
326 }
327 ****/
328 /**** Barış Çiçek version ****/
329 gchar *string = *str;
330 gchar *word, *word1, *word2, *temp;
331 gint i, len;
332 gchar utf8_character[6];
333 gunichar c;
334 gboolean set_to_upper_case, set_to_upper_case_tmp;
335 // There have to be space at the end of words to seperate them from prefix
336 // Chicago Manual of Style "Heading caps" Capitalization Rules (CMS 1993, 282) (http://www.docstyles.com/cmscrib.htm#Note2)
337 const gchar * exempt[] =
338 {
339 "a ", "a_",
340 "against ", "against_",
341 "an ", "an_",
342 "and ", "and_",
343 "at ", "at_",
344 "between ", "between_",
345 "but ", "but_",
346 "feat. ", "feat._",
347 "for ", "for_",
348 "in ", "in_",
349 "nor ", "nor_",
350 "of ", "of_",
351 //"off ", "off_", // Removed by Slash Bunny
352 "on ", "on_",
353 "or ", "or_",
354 //"over ", "over_", // Removed by Slash Bunny
355 "so ", "so_",
356 "the ", "the_",
357 "to ", "to_",
358 "with ", "with_",
359 "yet ", "yet_",
360 NULL
361 };
362
363 temp = Scan_Process_Fields_All_Downcase (string);
364 g_free (*str);
365 *str = string = temp;
366
367 if (!g_utf8_validate(string,-1,NULL))
368 {
369 /* FIXME: Translatable string. */
370 g_warning ("%s",
371 "Scan_Process_Fields_First_Letters_Uppercase: Not valid UTF-8!");
372 return;
373 }
374 /* Removes trailing whitespace. */
375 string = g_strchomp(string);
376
377 temp = string;
378
379 /* If the word is a roman numeral, capitalize all of it. */
380 if (handle_roman && (len = Scan_Word_Is_Roman_Numeral (temp)))
381 {
382 gchar *tmp = g_utf8_strup (temp, len);
383 strncpy (string, tmp, len);
384 g_free (tmp);
385 }
386 else
387 {
388 // Set first character to uppercase
389 c = g_utf8_get_char(temp);
390 strncpy(string, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
391 }
392
393 // Uppercase first character of each word, except for 'exempt[]' words lists
394 while ( temp )
395 {
396 word = temp; // Needed if there is only one word
397 word1 = strchr (temp, ' ');
398 word2 = strchr (temp, '_');
399
400 // Take the first string found (near beginning of string)
401 if (word1 && word2)
402 word = MIN(word1,word2);
403 else if (word1)
404 word = word1;
405 else if (word2)
406 word = word2;
407 else
408 {
409 // Last word of the string: the first letter is always uppercase,
410 // even if it's in the exempt list. This is a Chicago Manual of Style rule.
411 // Last Word In String - Should Capitalize Regardless of Word (Chicago Manual of Style)
412 c = g_utf8_get_char(word);
413 strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
414 break;
415 }
416
417 // Go to first character of the word (char. after ' ' or '_')
418 word = word+1;
419
420 // If the word is a roman numeral, capitalize all of it
421 if (handle_roman && (len = Scan_Word_Is_Roman_Numeral (word)))
422 {
423 gchar *tmp = g_utf8_strup (word, len);
424 strncpy (word, tmp, len);
425 g_free (tmp);
426 }
427 else
428 {
429 // Set uppercase the first character of this word
430 c = g_utf8_get_char(word);
431 strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
432
433 if (uppercase_preps)
434 {
435 goto increment;
436 }
437
438 /* Lowercase the first character of this word if found in the
439 * exempt words list. */
440 for (i=0; exempt[i]!=NULL; i++)
441 {
442 if (g_ascii_strncasecmp(exempt[i], word, strlen(exempt[i])) == 0)
443 {
444 c = g_utf8_get_char(word);
445 strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_tolower(c), utf8_character));
446 break;
447 }
448 }
449 }
450
451 increment:
452 temp = word;
453 }
454
455 // Uppercase letter placed after some characters like '(', '[', '{'
456 set_to_upper_case = FALSE;
457 for (temp = string; *temp; temp = g_utf8_next_char(temp))
458 {
459 c = g_utf8_get_char(temp);
460 set_to_upper_case_tmp = ( c == (gunichar)'('
461 || c == (gunichar)'['
462 || c == (gunichar)'{'
463 || c == (gunichar)'"'
464 || c == (gunichar)':'
465 || c == (gunichar)'.'
466 || c == (gunichar)'`'
467 || c == (gunichar)'-'
468 ) ? TRUE : FALSE;
469
470 if (set_to_upper_case && g_unichar_islower(c))
471 strncpy(temp, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
472
473 set_to_upper_case = set_to_upper_case_tmp;
474 }
475 }
476