1 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
2 /*
3   Copyright (C) 2015-2016  Kouhei Sutou <kou@clear-code.com>
4 
5   This library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License version 2.1 as published by the Free Software Foundation.
8 
9   This library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13 
14   You should have received a copy of the GNU Lesser General Public
15   License along with this library; if not, write to the Free Software
16   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
17 */
18 
19 /*
20   Groonga: ed300a833d44eaefa978b5ecf46a96ef91ae0891
21 
22   CFLAGS: -O2 -g
23   % make --quiet -C benchmark run-bench-nfkc
24   run-bench-nfkc:
25                                (total)    (average)  (median)
26     map1 - switch            : (0.0060ms) (0.00060000ms) (0.00000000ms)
27     map1 -  table            : (0.00000000ms) (0.00000000ms) (0.00000000ms)
28     map2 - switch - no change: (0.0010ms) (0.00010000ms) (0.00000000ms)
29     map2 -  table - no change: (0.00000000ms) (0.00000000ms) (0.00000000ms)
30     map2 - switch -    change: (0.0010ms) (0.00010000ms) (0.00000000ms)
31     map2 -  table -    change: (0.0010ms) (0.00010000ms) (0.00000000ms)
32 */
33 
34 #include <stdlib.h>
35 #include <stdio.h>
36 
37 #include <glib.h>
38 
39 #include <groonga.h>
40 
41 #include "lib/benchmark.h"
42 
43 #include "../lib/nfkc50.c"
44 
45 #define MAX_UNICODE 0x110000
46 #define BUFFER_SIZE 0x100
47 
48 static inline int
ucs2utf8(unsigned int i,unsigned char * buf)49 ucs2utf8(unsigned int i, unsigned char *buf)
50 {
51   unsigned char *p = buf;
52   if (i < 0x80) {
53     *p++ = i;
54   } else {
55     if (i < 0x800) {
56       *p++ = (i >> 6) | 0xc0;
57     } else {
58       if (i < 0x00010000) {
59         *p++ = (i >> 12) | 0xe0;
60       } else {
61         if (i < 0x00200000) {
62           *p++ = (i >> 18) | 0xf0;
63         } else {
64           if (i < 0x04000000) {
65             *p++ = (i >> 24) | 0xf8;
66           } else if (i < 0x80000000) {
67             *p++ = (i >> 30) | 0xfc;
68             *p++ = ((i >> 24) & 0x3f) | 0x80;
69           }
70           *p++ = ((i >> 18) & 0x3f) | 0x80;
71         }
72         *p++ = ((i >> 12) & 0x3f) | 0x80;
73       }
74       *p++ = ((i >> 6) & 0x3f) | 0x80;
75     }
76     *p++ = (0x3f & i) | 0x80;
77   }
78   *p = '\0';
79   return (p - buf);
80 }
81 
82 static void
bench_char_type(gpointer user_data)83 bench_char_type(gpointer user_data)
84 {
85   uint64_t code_point;
86   char utf8[7];
87 
88   for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
89     ucs2utf8(code_point, (unsigned char *)utf8);
90     grn_nfkc50_char_type(utf8);
91   }
92 }
93 
94 static void
bench_decompose(gpointer user_data)95 bench_decompose(gpointer user_data)
96 {
97   uint64_t code_point;
98   char utf8[7];
99 
100   for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
101     ucs2utf8(code_point, (unsigned char *)utf8);
102     grn_nfkc50_decompose(utf8);
103   }
104 }
105 
106 static void
bench_compose_no_change(gpointer user_data)107 bench_compose_no_change(gpointer user_data)
108 {
109   uint64_t prefix_code_point;
110   uint64_t suffix_code_point = 0x61; /* a */
111   char prefix_utf8[7];
112   char suffix_utf8[7];
113 
114   ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
115   for (prefix_code_point = 1;
116        prefix_code_point < MAX_UNICODE;
117        prefix_code_point++) {
118     ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
119     grn_nfkc50_compose(prefix_utf8, suffix_utf8);
120   }
121 }
122 
123 static void
bench_compose_change(gpointer user_data)124 bench_compose_change(gpointer user_data)
125 {
126   uint64_t prefix_code_point;
127   uint64_t suffix_code_point = 0x11ba;
128   char prefix_utf8[7];
129   char suffix_utf8[7];
130 
131   ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
132   for (prefix_code_point = 1;
133        prefix_code_point < MAX_UNICODE;
134        prefix_code_point++) {
135     ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
136     grn_nfkc50_compose(prefix_utf8, suffix_utf8);
137   }
138 }
139 
140 /*
141 static void
142 check_char_type(gpointer user_data)
143 {
144   uint64_t code_point;
145   char utf8[7];
146 
147   for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
148     grn_char_type a;
149     grn_char_type b;
150 
151     ucs2utf8(code_point, (unsigned char *)utf8);
152     a = grn_nfkc_char_type(utf8);
153     b = grn_nfkc50_char_type(utf8);
154     if (a == b) {
155       continue;
156     }
157     printf("%lx: %s: %d != %d\n", code_point, utf8, a, b);
158   }
159 }
160 
161 static void
162 check_decompose(gpointer user_data)
163 {
164   uint64_t code_point;
165   char utf8[7];
166 
167   for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
168     const char *a;
169     const char *b;
170 
171     ucs2utf8(code_point, (unsigned char *)utf8);
172     a = grn_nfkc_decompose(utf8);
173     b = grn_nfkc50_decompose(utf8);
174     if (a == b) {
175       continue;
176     }
177     if (!a || !b) {
178       printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
179       continue;
180     }
181     if (strcmp(a, b) != 0) {
182       printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
183     }
184   }
185 }
186 
187 static void
188 check_compose(gpointer user_data)
189 {
190   uint64_t prefix_code_point;
191   uint64_t suffix_code_point;
192   char prefix_utf8[7];
193   char suffix_utf8[7];
194 
195   for (prefix_code_point = 1;
196        prefix_code_point < MAX_UNICODE;
197        prefix_code_point++) {
198     ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
199     for (suffix_code_point = 1;
200          suffix_code_point < MAX_UNICODE;
201          suffix_code_point++) {
202       const char *a;
203       const char *b;
204 
205       ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
206       a = grn_nfkc_compose(prefix_utf8, suffix_utf8);
207       b = grn_nfkc50_compose(prefix_utf8, suffix_utf8);
208       if (a == b) {
209         continue;
210       }
211       if (!a || !b) {
212         printf("%lx-%lx: %s-%s: %s != %s\n",
213                prefix_code_point, suffix_code_point,
214                prefix_utf8, suffix_utf8,
215                a, b);
216         continue;
217       }
218       if (strcmp(a, b) != 0) {
219         printf("%lx-%lx: %s-%s: %s != %s\n",
220                prefix_code_point, suffix_code_point,
221                prefix_utf8, suffix_utf8,
222                a, b);
223       }
224     }
225     if ((prefix_code_point % 10000) == 0) {
226       printf("%" G_GUINT64_FORMAT "\n", prefix_code_point);
227     }
228   }
229 }
230 */
231 
232 int
main(int argc,gchar ** argv)233 main(int argc, gchar **argv)
234 {
235   grn_rc rc;
236   BenchReporter *reporter;
237   gint n = 10;
238 
239   rc = grn_init();
240   if (rc != GRN_SUCCESS) {
241     g_print("failed to initialize Groonga: <%d>: %s\n",
242             rc, grn_get_global_error_message());
243     return EXIT_FAILURE;
244   }
245   bench_init(&argc, &argv);
246 
247   reporter = bench_reporter_new();
248 
249   if (g_getenv("N")) {
250     n = atoi(g_getenv("N"));
251   }
252 
253 #define REGISTER(label, bench_function)                 \
254   bench_reporter_register(reporter, label, n,           \
255                           NULL,                         \
256                           bench_function,               \
257                           NULL,                         \
258                           NULL)
259   REGISTER("char_type            ", bench_char_type);
260   REGISTER("decompose            ", bench_decompose);
261   REGISTER("compose   - no change", bench_compose_no_change);
262   REGISTER("compose   -    change", bench_compose_change);
263 
264   /*
265     REGISTER("check - char_type", check_char_type);
266     REGISTER("check - decompose", check_decompose);
267     REGISTER("check - compose  ", check_compose);
268   */
269 #undef REGISTER
270 
271   bench_reporter_run(reporter);
272   g_object_unref(reporter);
273 
274   return EXIT_SUCCESS;
275 }
276