1 #include <stdio.h>
2 #include <stdlib.h>
3 
4 /* ICU4C */
5 #include <unicode/utypes.h>
6 #include <unicode/ustring.h>
7 #include <unicode/ucnv.h>
8 #include <unicode/unorm2.h>
9 
10 #include "util.h"
11 
main(int argc,char ** argv)12 int main(int argc, char **argv)
13 {
14 	 int i;
15 
16 	 UErrorCode err;
17 	 UConverter *uc = ucnv_open("UTF8", &err);
18 	 if (U_FAILURE(err)) return EXIT_FAILURE;
19 
20 	 const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err);
21 	 if (U_FAILURE(err)) return EXIT_FAILURE;
22 
23 	 for (i = 1; i < argc; ++i) {
24 		  if (argv[i][0] == '-') {
25 			   fprintf(stderr, "unrecognized option: %s\n", argv[i]);
26 			   return EXIT_FAILURE;
27 		  }
28 
29 		  size_t len;
30 		  uint8_t *src = readfile(argv[i], &len);
31 		  if (!src) {
32 			   fprintf(stderr, "error reading %s\n", argv[i]);
33 			   return EXIT_FAILURE;
34 		  }
35 
36 		  /* convert UTF8 data to ICU's UTF16 */
37 		  UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar));
38 		  ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err);
39 		  if (U_FAILURE(err)) return EXIT_FAILURE;
40 		  size_t ulen = u_strlen(usrc);
41 
42 		  /* ICU's insane normalization API requires you to
43 			 know the size of the destination buffer in advance,
44 			 or alternatively to repeatly try normalizing and
45 			 double the buffer size until it succeeds.  Here, I just
46 			 allocate a huge destination buffer to avoid the issue. */
47 		  UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar));
48 
49 		  mytime start = gettime();
50 		  for (int i = 0; i < 100; ++i) {
51 			   unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err);
52 			   if (U_FAILURE(err)) return EXIT_FAILURE;
53 		  }
54 		  printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
55 		  free(udest);
56 		  free(usrc);
57 		  free(src);
58 	 }
59 
60 	 return EXIT_SUCCESS;
61 }
62