1 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
2 /*
3 Copyright (C) 2015-2016 Kouhei Sutou <kou@clear-code.com>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 */
18
19 /*
20 Groonga: ed300a833d44eaefa978b5ecf46a96ef91ae0891
21
22 CFLAGS: -O2 -g
23 % make --quiet -C benchmark run-bench-nfkc
24 run-bench-nfkc:
25 (total) (average) (median)
26 map1 - switch : (0.0060ms) (0.00060000ms) (0.00000000ms)
27 map1 - table : (0.00000000ms) (0.00000000ms) (0.00000000ms)
28 map2 - switch - no change: (0.0010ms) (0.00010000ms) (0.00000000ms)
29 map2 - table - no change: (0.00000000ms) (0.00000000ms) (0.00000000ms)
30 map2 - switch - change: (0.0010ms) (0.00010000ms) (0.00000000ms)
31 map2 - table - change: (0.0010ms) (0.00010000ms) (0.00000000ms)
32 */
33
34 #include <stdlib.h>
35 #include <stdio.h>
36
37 #include <glib.h>
38
39 #include <groonga.h>
40
41 #include "lib/benchmark.h"
42
43 #include "../lib/nfkc50.c"
44
45 #define MAX_UNICODE 0x110000
46 #define BUFFER_SIZE 0x100
47
48 static inline int
ucs2utf8(unsigned int i,unsigned char * buf)49 ucs2utf8(unsigned int i, unsigned char *buf)
50 {
51 unsigned char *p = buf;
52 if (i < 0x80) {
53 *p++ = i;
54 } else {
55 if (i < 0x800) {
56 *p++ = (i >> 6) | 0xc0;
57 } else {
58 if (i < 0x00010000) {
59 *p++ = (i >> 12) | 0xe0;
60 } else {
61 if (i < 0x00200000) {
62 *p++ = (i >> 18) | 0xf0;
63 } else {
64 if (i < 0x04000000) {
65 *p++ = (i >> 24) | 0xf8;
66 } else if (i < 0x80000000) {
67 *p++ = (i >> 30) | 0xfc;
68 *p++ = ((i >> 24) & 0x3f) | 0x80;
69 }
70 *p++ = ((i >> 18) & 0x3f) | 0x80;
71 }
72 *p++ = ((i >> 12) & 0x3f) | 0x80;
73 }
74 *p++ = ((i >> 6) & 0x3f) | 0x80;
75 }
76 *p++ = (0x3f & i) | 0x80;
77 }
78 *p = '\0';
79 return (p - buf);
80 }
81
82 static void
bench_char_type(gpointer user_data)83 bench_char_type(gpointer user_data)
84 {
85 uint64_t code_point;
86 char utf8[7];
87
88 for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
89 ucs2utf8(code_point, (unsigned char *)utf8);
90 grn_nfkc50_char_type(utf8);
91 }
92 }
93
94 static void
bench_decompose(gpointer user_data)95 bench_decompose(gpointer user_data)
96 {
97 uint64_t code_point;
98 char utf8[7];
99
100 for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
101 ucs2utf8(code_point, (unsigned char *)utf8);
102 grn_nfkc50_decompose(utf8);
103 }
104 }
105
106 static void
bench_compose_no_change(gpointer user_data)107 bench_compose_no_change(gpointer user_data)
108 {
109 uint64_t prefix_code_point;
110 uint64_t suffix_code_point = 0x61; /* a */
111 char prefix_utf8[7];
112 char suffix_utf8[7];
113
114 ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
115 for (prefix_code_point = 1;
116 prefix_code_point < MAX_UNICODE;
117 prefix_code_point++) {
118 ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
119 grn_nfkc50_compose(prefix_utf8, suffix_utf8);
120 }
121 }
122
123 static void
bench_compose_change(gpointer user_data)124 bench_compose_change(gpointer user_data)
125 {
126 uint64_t prefix_code_point;
127 uint64_t suffix_code_point = 0x11ba;
128 char prefix_utf8[7];
129 char suffix_utf8[7];
130
131 ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
132 for (prefix_code_point = 1;
133 prefix_code_point < MAX_UNICODE;
134 prefix_code_point++) {
135 ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
136 grn_nfkc50_compose(prefix_utf8, suffix_utf8);
137 }
138 }
139
140 /*
141 static void
142 check_char_type(gpointer user_data)
143 {
144 uint64_t code_point;
145 char utf8[7];
146
147 for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
148 grn_char_type a;
149 grn_char_type b;
150
151 ucs2utf8(code_point, (unsigned char *)utf8);
152 a = grn_nfkc_char_type(utf8);
153 b = grn_nfkc50_char_type(utf8);
154 if (a == b) {
155 continue;
156 }
157 printf("%lx: %s: %d != %d\n", code_point, utf8, a, b);
158 }
159 }
160
161 static void
162 check_decompose(gpointer user_data)
163 {
164 uint64_t code_point;
165 char utf8[7];
166
167 for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
168 const char *a;
169 const char *b;
170
171 ucs2utf8(code_point, (unsigned char *)utf8);
172 a = grn_nfkc_decompose(utf8);
173 b = grn_nfkc50_decompose(utf8);
174 if (a == b) {
175 continue;
176 }
177 if (!a || !b) {
178 printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
179 continue;
180 }
181 if (strcmp(a, b) != 0) {
182 printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
183 }
184 }
185 }
186
187 static void
188 check_compose(gpointer user_data)
189 {
190 uint64_t prefix_code_point;
191 uint64_t suffix_code_point;
192 char prefix_utf8[7];
193 char suffix_utf8[7];
194
195 for (prefix_code_point = 1;
196 prefix_code_point < MAX_UNICODE;
197 prefix_code_point++) {
198 ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
199 for (suffix_code_point = 1;
200 suffix_code_point < MAX_UNICODE;
201 suffix_code_point++) {
202 const char *a;
203 const char *b;
204
205 ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
206 a = grn_nfkc_compose(prefix_utf8, suffix_utf8);
207 b = grn_nfkc50_compose(prefix_utf8, suffix_utf8);
208 if (a == b) {
209 continue;
210 }
211 if (!a || !b) {
212 printf("%lx-%lx: %s-%s: %s != %s\n",
213 prefix_code_point, suffix_code_point,
214 prefix_utf8, suffix_utf8,
215 a, b);
216 continue;
217 }
218 if (strcmp(a, b) != 0) {
219 printf("%lx-%lx: %s-%s: %s != %s\n",
220 prefix_code_point, suffix_code_point,
221 prefix_utf8, suffix_utf8,
222 a, b);
223 }
224 }
225 if ((prefix_code_point % 10000) == 0) {
226 printf("%" G_GUINT64_FORMAT "\n", prefix_code_point);
227 }
228 }
229 }
230 */
231
232 int
main(int argc,gchar ** argv)233 main(int argc, gchar **argv)
234 {
235 grn_rc rc;
236 BenchReporter *reporter;
237 gint n = 10;
238
239 rc = grn_init();
240 if (rc != GRN_SUCCESS) {
241 g_print("failed to initialize Groonga: <%d>: %s\n",
242 rc, grn_get_global_error_message());
243 return EXIT_FAILURE;
244 }
245 bench_init(&argc, &argv);
246
247 reporter = bench_reporter_new();
248
249 if (g_getenv("N")) {
250 n = atoi(g_getenv("N"));
251 }
252
253 #define REGISTER(label, bench_function) \
254 bench_reporter_register(reporter, label, n, \
255 NULL, \
256 bench_function, \
257 NULL, \
258 NULL)
259 REGISTER("char_type ", bench_char_type);
260 REGISTER("decompose ", bench_decompose);
261 REGISTER("compose - no change", bench_compose_no_change);
262 REGISTER("compose - change", bench_compose_change);
263
264 /*
265 REGISTER("check - char_type", check_char_type);
266 REGISTER("check - decompose", check_decompose);
267 REGISTER("check - compose ", check_compose);
268 */
269 #undef REGISTER
270
271 bench_reporter_run(reporter);
272 g_object_unref(reporter);
273
274 return EXIT_SUCCESS;
275 }
276