1 /*
2  * Copyright (C) 2002 Laird Breyer
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
17  *
18  * Author:   Laird Breyer <laird@lbreyer.com>
19  */
20 
21 #ifndef DBACL_H
22 #define DBACL_H
23 
24 #ifdef HAVE_CONFIG_H
25 #undef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28 
29 #ifndef VERSION
30 #ifdef PACKAGE_VERSION
31 #define VERSION PACKAGE_VERSION
32 #endif
33 #endif
34 
35 #define COPYBLURB "Copyright (c) 2002-2013 L.A. Breyer. All rights reserved.\n" \
36                   "%s comes with ABSOLUTELY NO WARRANTY, and is licensed\n" \
37 	          "to you under the terms of the GNU General Public License 3 or later.\n\n"
38 
39 #define DEFAULT_CATPATH "DBACL_PATH"
40 /* define this to save category files with a temporary name, then atomically
41  * rename them. This makes corrupt category files nearly impossible, and
42  * obviates the need for file locking in case another instance of dbacl is
43  * trying to read the category while it is being written.
44  */
45 #define ATOMIC_CATSAVE
46 /* we give our files the 640 permissions - I've added "write"
47    permission because sometimes we want to mmap/readwrite such files
48    so we need those permissions. Also, O_BINARY is not portable, but
49    a good idea for some platforms
50  */
51 #ifndef O_BINARY
52 #define O_BINARY 0
53 #endif
54 #define ATOMIC_CREATE(x) open(x, O_CREAT|O_EXCL|O_RDWR|O_BINARY, 0640)
55 
56 /* we define several memory models, which differ basically
57    in the number of bytes used for the hash tables. Adjust to taste */
58 
59 /* use this for 64-bit hashes */
60 #undef HUGE_MEMORY_MODEL
61 /* use this for 32-bit hashes */
62 #define NORMAL_MEMORY_MODEL
63 /* use this for 16-bit hashes */
64 #undef SMALL_MEMORY_MODEL
65 /* use this for 8-bit hashes */
66 #undef TINY_MEMORY_MODEL
67 
68 /* the following defines set up a tradeoff between
69    modelling accuracy and memory requirements - season to taste
70    (if you often get digitization errors, undef the appropriate macro) */
71 
72 /* digram digitization: avg loss of precision = 0.01 * token size */
73 #define DIGITIZE_DIGRAMS
74 /* lambda digitization: avg loss of precision = 0.01 */
75 #define DIGITIZE_LAMBDA
76 /* learner.hash digitization: avg loss of precision = 0.01 */
77 #define DIGITIZE_LWEIGHTS
78 #if defined HAVE_MBRTOWC
79 
80 #include <wctype.h>
81 #include <wchar.h>
82 
83 #endif
84 
85 #include <limits.h>
86 #include <stdio.h>
87 
88 
89 #if !defined LOADED_REGEX
90 
91 #include <sys/types.h>
92 #include <regex.h>
93 
94 #endif
95 
96 #if defined HAVE_NETINET_IN_H
97 #include <netinet/in.h>
98 #endif
99 
100 #ifndef htonl
101 #define htonl(x) (x)
102 #define ntohl(x) (x)
103 #define htons(x) (x)
104 #define ntohs(x) (x)
105 #endif
106 
107 #if defined OS_SUN
108 #include <ieeefp.h>
109 #endif
110 
111 /* some systems seem to have broken sys/types */
112 #if defined OS_SUN || defined OS_HPUX
113 #include <inttypes.h>
114 
115 typedef uint8_t u_int8_t;
116 typedef uint16_t u_int16_t;
117 typedef uint32_t u_int32_t;
118 typedef uint64_t u_int64_t;
119 
120 #endif
121 
122 #ifdef HAVE_MMAP
123 #ifdef HAVE_MADVISE
124 #ifdef HAVE_SYS_MMAN_H
125 
126 #include <sys/types.h>
127 #include <sys/mman.h>
128 
129 #ifdef OS_SUN
130 #define MADVISE(x,y,z) madvise((caddr_t)(x),y,z)
131 #define MLOCK(x,y) mlock((caddr_t)(x),y)
132 #define MUNLOCK(x,y) munlock((caddr_t)(x),y)
133 #define MUNMAP(x,y) munmap((void *)(x),y)
134 #define MMAP(x,y,z,t,u,v) mmap((void *)(x),y,z,t,u,v)
135 #else
136 #define MADVISE(x,y,z) madvise(x,y,z)
137 #define MLOCK(x,y) mlock(x,y)
138 #define MUNLOCK(x,y) munlock(x,y)
139 #define MUNMAP(x,y) munmap((void *)(x), y)
140 #define MMAP(x,y,z,t,u,v) mmap((void *)(x),y,z,t,u,v)
141 #endif
142 
143 #endif
144 #endif
145 #endif
146 
147 #ifndef MADVISE
148 #define MAP_FAILED ((void *)-1)
149 #define MADVISE(x,y,z)
150 #define MLOCK(x,y)
151 #define MUNLOCK(x,y)
152 #define MUNMAP(x,y)
153 #define MMAP(x,y,z,t,u,v) NULL
154 #endif
155 
156 /* constants used by mmap */
157 #ifndef PROT_READ
158 #define PROT_READ  0
159 #define PROT_WRITE 0
160 #define PROT_EXEC  0
161 #define PROT_NONE  0
162 #endif
163 
164 #define PAGEALIGN(x) ((x) / system_pagesize) * system_pagesize
165 
166 /* below, FMT_* macros are used in printf/scanf format strings */
167 #if defined HUGE_MEMORY_MODEL
168 
169 typedef u_int64_t token_count_t;
170 typedef unsigned int token_order_t; /* used in bit-field, therefore uint */
171 typedef unsigned int token_class_t; /* used in bit-field, therefore uint */
172 typedef u_int8_t hash_bit_count_t;
173 typedef u_int64_t hash_count_t;
174 typedef unsigned int hash_percentage_t;
175 typedef u_int16_t category_count_t;
176 typedef u_int16_t regex_count_t;
177 typedef u_int64_t document_count_t;
178 typedef u_int16_t confidence_t;
179 
180 typedef float weight_t;
181 typedef double score_t;
182 #define FMT_printf_score_t "f"
183 #define FMT_scanf_score_t "lf"
184 #define FMT_printf_integer_t "ld"
185 
186 typedef u_int16_t token_stack_t;
187 typedef int charbuf_len_t;
188 typedef u_int16_t alphabet_size_t;
189 typedef u_int16_t smbitmap_t;
190 typedef u_int8_t regex_flags_t;
191 
192 typedef int error_code_t;
193 typedef int bool_t;
194 typedef u_int8_t byte_t;
195 
196 #if defined DIGITIZE_DIGRAMS && defined DIGITIZE_LAMBDA
197 /* cats not portable because hash value is too big */
198 #undef PORTABLE_CATS
199 #endif
200 /* keep typedefs and macros togegher */
201 typedef u_int64_t hash_value_t;
202 #define hton_hash_value_t(x) (x)
203 #define ntoh_hash_value_t(x) (x)
204 typedef u_int16_t digitized_weight_t;
205 #define hton_digitized_weight_t(x) (x)
206 #define ntoh_digitized_weight_t(x) (x)
207 
208 /* where token counts wrap around */
209 #define K_TOKEN_COUNT_MAX ((token_count_t)18446744073709551615U)
210 /* where digrams wrap around */
211 #define K_DIGRAM_COUNT_MAX ((weight_t)1.0e+9)
212 /* size of hash in bits */
213 #define MAX_HASH_BITS ((hash_bit_count_t)64)
214 /* for line filtering: maximum number of tokens allowed on a single line */
215 #define MAX_TOKEN_LINE_STACK ((token_stack_t)16384)
216 /* number of pages we want to use for I/O buffering */
217 #define BUFFER_MAG 64
218 /* we need 8 byte hash values */
219 #define JENKINS8
220 #undef  JENKINS4
221 
222 #elif defined NORMAL_MEMORY_MODEL
223 
224 typedef u_int32_t token_count_t;
225 typedef unsigned int token_order_t;/* used in bit-field, therefore uint */
226 typedef unsigned int token_class_t;/* used in bit-field, therefore uint */
227 typedef u_int8_t hash_bit_count_t;
228 typedef u_int32_t hash_count_t;
229 typedef unsigned int hash_percentage_t;
230 typedef u_int8_t category_count_t;
231 typedef u_int8_t regex_count_t;
232 typedef u_int32_t document_count_t;
233 typedef u_int16_t confidence_t;
234 
235 typedef float weight_t;
236 typedef double score_t;
237 #define FMT_printf_score_t "f"
238 #define FMT_scanf_score_t "lf"
239 #define FMT_printf_integer_t "d"
240 
241 typedef u_int8_t token_stack_t;
242 typedef int charbuf_len_t;
243 typedef u_int16_t alphabet_size_t;
244 typedef u_int16_t smbitmap_t;
245 typedef u_int8_t regex_flags_t;
246 
247 typedef int error_code_t;
248 typedef int bool_t;
249 typedef u_int8_t byte_t;
250 
251 #if defined DIGITIZE_DIGRAMS && defined DIGITIZE_LAMBDA && defined HAVE_NETINET_IN_H
252 #define PORTABLE_CATS
253 #endif
254 /* keep typedefs and macros togegher */
255 typedef u_int32_t hash_value_t;
256 #define hton_hash_value_t(x) htonl(x)
257 #define ntoh_hash_value_t(x) ntohl(x)
258 typedef u_int16_t digitized_weight_t;
259 #define hton_digitized_weight_t(x) htons(x)
260 #define ntoh_digitized_weight_t(x) ntohs(x)
261 
262 /* where token counts wrap around */
263 #define K_TOKEN_COUNT_MAX ((token_count_t)4294967295U)
264 /* where digrams wrap around */
265 #define K_DIGRAM_COUNT_MAX ((weight_t)1.0e+9)
266 /* size of hash in bits */
267 #define MAX_HASH_BITS ((hash_bit_count_t)30)
268 /* for line filtering: maximum number of tokens allowed on a single line */
269 #define MAX_TOKEN_LINE_STACK ((token_stack_t)255)
270 /* number of pages we want to use for I/O buffering */
271 #define BUFFER_MAG 32
272 /* we need 4 byte hash values */
273 #define  JENKINS8
274 #undef JENKINS4
275 
276 #elif defined SMALL_MEMORY_MODEL
277 
278 typedef u_int32_t token_count_t;
279 typedef unsigned int token_order_t;/* used in bit-field, therefore uint */
280 typedef unsigned int token_class_t;/* used in bit-field, therefore uint */
281 typedef u_int8_t hash_bit_count_t;
282 typedef u_int16_t hash_count_t;
283 typedef unsigned int hash_percentage_t;
284 typedef u_int8_t category_count_t;
285 typedef u_int8_t regex_count_t;
286 typedef u_int16_t document_count_t;
287 typedef u_int16_t confidence_t;
288 
289 typedef float weight_t;
290 typedef double score_t;
291 #define FMT_printf_score_t "f"
292 #define FMT_scanf_score_t "lf"
293 #define FMT_printf_integer_t "d"
294 
295 typedef u_int8_t token_stack_t;
296 typedef int charbuf_len_t;
297 typedef u_int16_t alphabet_size_t;
298 typedef u_int16_t smbitmap_t;
299 typedef u_int8_t regex_flags_t;
300 
301 typedef int error_code_t;
302 typedef int bool_t;
303 typedef u_int8_t byte_t;
304 
305 #if defined DIGITIZE_DIGRAMS && defined DIGITIZE_LAMBDA && defined HAVE_NETINET_IN_H
306 #define PORTABLE_CATS
307 #endif
308 /* keep typedefs and macros togegher */
309 typedef u_int16_t hash_value_t;
310 #define hton_hash_value_t(x) htons(x)
311 #define ntoh_hash_value_t(x) ntohs(x)
312 typedef u_int16_t digitized_weight_t;
313 #define hton_digitized_weight_t(x) htons(x)
314 #define ntoh_digitized_weight_t(x) ntohs(x)
315 
316 /* where token counts wrap around */
317 #define K_TOKEN_COUNT_MAX ((token_count_t)4294967295U)
318 /* where digrams wrap around */
319 #define K_DIGRAM_COUNT_MAX ((weight_t)1.0e+9)
320 /* size of hash in bits */
321 #define MAX_HASH_BITS ((hash_bit_count_t)15)
322 /* for line filtering: maximum number of tokens allowed on a single line */
323 #define MAX_TOKEN_LINE_STACK ((token_stack_t)128)
324 /* number of pages we want to use for I/O buffering */
325 #define BUFFER_MAG 16
326 /* we need 4 byte hash values */
327 #undef  JENKINS8
328 #define JENKINS4
329 
330 #elif defined TINY_MEMORY_MODEL
331 /* not tested, this model probably doesn't work ;-) */
332 #undef DIGITIZE_DIGRAMS
333 
334 typedef u_int32_t token_count_t;
335 typedef unsigned int token_order_t;/* used in bit-field, therefore uint */
336 typedef unsigned int token_class_t;/* used in bit-field, therefore uint */
337 typedef u_int8_t hash_bit_count_t;
338 typedef u_int8_t hash_count_t;
339 typedef unsigned int hash_percentage_t;
340 typedef u_int8_t category_count_t;
341 typedef u_int8_t regex_count_t;
342 typedef u_int8_t document_count_t;
343 typedef u_int16_t confidence_t;
344 
345 typedef float weight_t;
346 typedef double score_t;
347 #define FMT_printf_score_t "f"
348 #define FMT_scanf_score_t "lf"
349 #define FMT_printf_integer_t "d"
350 
351 typedef u_int8_t token_stack_t;
352 typedef int charbuf_len_t;
353 typedef u_int16_t alphabet_size_t;
354 typedef u_int16_t smbitmap_t;
355 typedef u_int8_t regex_flags_t;
356 
357 typedef int error_code_t;
358 typedef int bool_t;
359 typedef u_int8_t byte_t;
360 
361 #if defined DIGITIZE_DIGRAMS && defined DIGITIZE_LAMBDA
362 #undef PORTABLE_CATS
363 #endif
364 /* keep typedefs and macros togegher */
365 typedef u_int8_t hash_value_t;
366 #define hton_hash_value_t(x) (x)
367 #define ntoh_hash_value_t(x) (x)
368 typedef u_int16_t digitized_weight_t;
369 #define hton_digitized_weight_t(x) htons(x)
370 #define ntoh_digitized_weight_t(x) ntohs(x)
371 
372 #define K_TOKEN_COUNT_MAX ((token_count_t)4294967295U)
373 /* where digrams wrap around */
374 #define K_DIGRAM_COUNT_MAX ((weight_t)1.0e+9)
375 /* size of hash in bits */
376 #define MAX_HASH_BITS ((hash_bit_count_t)8)
377 /* for line filtering: maximum number of tokens allowed on a single line */
378 #define MAX_TOKEN_LINE_STACK ((token_stack_t)128)
379 /* number of pages we want to use for I/O buffering */
380 #define BUFFER_MAG 2
381 /* we need 4 byte hash values */
382 #undef  JENKINS8
383 #define JENKINS4
384 
385 #endif
386 
387 /* this is common to all memory models */
388 
389 #if defined OS_DARWIN
390 /* the system I tested this on didn't seem to like packed structures */
391 #define PACK_STRUCTS
392 
393 #else
394 
395 /* disable this if speed is paramount  */
396 #if defined __GNUC__
397 #define PACK_STRUCTS __attribute__ ((packed))
398 #else
399 #define PACK_STRUCTS
400 #endif
401 
402 #endif
403 
404 /* when digitizing transitions, this stands for -infinity */
405 #define DIGITIZED_WEIGHT_MIN ((digitized_weight_t)0)
406 #define DIGITIZED_WEIGHT_MAX ((digitized_weight_t)USHRT_MAX)
407 #define DIG_FACTOR           5
408 /* maximum number of categories we can handle simultaneously */
409 #define MAX_CAT ((category_count_t)16383)
410 /* percentage of hash we use */
411 #define HASH_FULL ((hash_percentage_t)95)
412 /* alphabet size */
413 #define ASIZE ((alphabet_size_t)256)
414 /* we need three special markers, which cannot be part
415  * of the alphabet. Fortunately, we can use ASCII control
416  * characters. Hopefully, these won't be used for anything important.
417  * Make sure AMIN equals DIAMOND is the last reserved char.
418  */
419 #define TOKENSEP '\001'
420 #define CLASSEP '\002'
421 #define DIAMOND '\003'
422 #define AMIN DIAMOND
423 #define EOTOKEN CLASSEP
424 /* enough room to pad token with NULL, DIAMOND, CLASSEP and class */
425 #define EXTRA_CLASS_LEN 2
426 #define EXTRA_TOKEN_LEN (EXTRA_CLASS_LEN + 2)
427 #define MULTIBYTE_EPSILON 10 /* enough for a multibyte char and a null char */
428 
429 /* make sure a character is in the alphabet range */
430 #define CLIP_ALPHABET(x) x = (((unsigned char)x) < AMIN) ? AMIN : (x)
431 /* the space outside of AMIN-ASIZE is used for auxiliary RESERVED_* data */
432 #define RESERVED_UNUSED0  0 /* dig[0][0-255] */
433 #define RESERVED_MARGINAL 2 /* dig[2][0-255] counts single char marginal freqs */
434 #define RESERVED_TOKLEN   1 /* dig[1][0-MAX_TOKEN_LEN] counts token lengths */
435 
436 /* decides how we compute the shannon entropy */
437 #undef SHANNON_STIRLING
438 
439 /* maximum size of a token, beyond that rest is ignored (ie put into
440  * another token) The value should not be too big, because it protects
441  * against extreme probabilities failing to digitize properly.
442  *
443  * Here's the back-of-the-envelope calculation: In the model, for each
444  * token, we save the reference weight, and the lambda weight. It
445  * seems that the lambdas are of the same order as the corresponding
446  * n-gram's reference weight,
447  *
448  * The reference weights are most extreme for the uniform, equal to
449  * about -5 per token character. Thus, for an n-gram lambda weight,
450  * the most extreme values are about (-5) * total number of
451  * characters. Our calculations blow up for n >= 6 anyway, so the most
452  * extreme value in the worst case (=7) should be about (-5) *
453  * MAX_TOKEN_LEN * 7.
454  *
455  * The other constraint is that we must be able to digitize the
456  * weights, and with DIG_FACTOR = 5, the extreme weight values can be
457  * up to 2048. Giving us a margin of error, we assume that 35 *
458  * MAX_TOKEN_LEN < 1024, which gives MAX_TOKEN_LEN = 30.
459  */
460 #define MAX_TOKEN_LEN ((charbuf_len_t)30)
461 #define TOKEN_LIST_GROW 1048576L
462 
463 /* user options */
464 #define U_OPTION_CLASSIFY               1
465 #define U_OPTION_LEARN                  2
466 #define U_OPTION_FASTEMP                3
467 #define U_OPTION_CUTOFF                 4
468 #define U_OPTION_VERBOSE                5
469 #define U_OPTION_STDIN                  6
470 #define U_OPTION_SCORES                 7
471 #define U_OPTION_POSTERIOR              8
472 #define U_OPTION_FILTER                 9
473 #define U_OPTION_DEBUG                  10
474 #define U_OPTION_DUMP                   12
475 #define U_OPTION_APPEND                 13
476 #define U_OPTION_DECIMATE               14
477 #define U_OPTION_GROWHASH               15
478 #define U_OPTION_INDENTED               16
479 #define U_OPTION_NOZEROLEARN            17
480 #define U_OPTION_MMAP                   21
481 #define U_OPTION_CONFIDENCE             22
482 #define U_OPTION_VAR                    23
483 #define U_OPTION_HM_ADDRESSES           24
484 #define U_OPTION_CLASSIFY_MULTIFILE     25
485 #define U_OPTION_PRIOR_CORRECTION       26
486 #define U_OPTION_MEDIACOUNTS            27
487 
488 /* model options */
489 #define M_OPTION_REFMODEL               1
490 #define M_OPTION_TEXT_FORMAT            2
491 #define M_OPTION_MBOX_FORMAT            3
492 #define M_OPTION_XML                    4
493 #define M_OPTION_I18N                   5
494 #define M_OPTION_CASEN                  6
495 #define M_OPTION_CALCENTROPY            7
496 #define M_OPTION_MULTINOMIAL            8
497 #define M_OPTION_HEADERS                13
498 #define M_OPTION_PLAIN                  14
499 #define M_OPTION_NOPLAIN                15
500 #define M_OPTION_SHOW_LINKS             16
501 #define M_OPTION_SHOW_ALT               17
502 #define M_OPTION_HTML                   18
503 #define M_OPTION_XHEADERS               19
504 #define M_OPTION_SHOW_SCRIPT            21
505 #define M_OPTION_SHOW_HTML_COMMENTS     22
506 #define M_OPTION_USE_STDTOK             23
507 #define M_OPTION_ATTACHMENTS            24
508 #define M_OPTION_WARNING_BAD            25
509 #define M_OPTION_SHOW_STYLE             26
510 #define M_OPTION_SHOW_FORMS             28
511 #define M_OPTION_NOHEADERS              29
512 #define M_OPTION_NGRAM_STRADDLE_NL      30
513 #define M_OPTION_THEADERS               31
514 
515 /* category options */
516 #define C_OPTION_MMAPPED_HASH            1
517 
518 
519 typedef u_int32_t options_t; /* make sure big enough for all options */
520 typedef enum {
521   DT_DEFAULT=0,
522   DT_UNIFORM, DT_DIRICHLET, DT_MAXENT, DT_MLE, DT_IID
523 } digtype_t;
524 typedef enum {
525   CP_DEFAULT=0,
526   CP_CHAR, CP_ALPHA, CP_ALNUM, CP_GRAPH,
527   CP_CEF, CP_ADP, CP_CEF2
528 } charparser_t;
529 #define FMT_printf_options_t "d"
530 #define FMT_scanf_options_t "ld"
531 
532 typedef long int re_bitfield;
533 #define MAX_RE ((regex_count_t)(8 * sizeof(re_bitfield)))
534 #define INVALID_RE 0
535 /* maximum number of tagged subexpressions we can handle for each regex */
536 #define MAX_SUBMATCH ((token_order_t)9)
537 
538 typedef enum {gcUNDEF = 0, gcDISCARD, gcTOKEN, gcTOKEN_END, gcIGNORE} good_char_t;
539 
540 
541 /* macros */
542 
543 /* used for digitizing */
544 #if defined DIGITIZE_LWEIGHTS
545 
546 /* use this when digitizing positive weights */
547 #define PACK_LWEIGHTS(a) ((digitized_weight_t)digitize_a_weight(a,1))
548 #define UNPACK_LWEIGHTS(a) ((weight_t)undigitize_a_weight(a,1))
549 
550 /* use this when digitizing negative weights */
551 #define PACK_RWEIGHTS(a) ((digitized_weight_t)digitize_a_weight(-(a),1))
552 #define UNPACK_RWEIGHTS(a) (-(weight_t)undigitize_a_weight(a,1))
553 
554 #define DW "w"
555 
556 #else
557 
558 #define PACK_LWEIGHTS(a) ((weight_t)(a))
559 #define UNPACK_LWEIGHTS(a) ((weight_t)(a))
560 
561 #define PACK_RWEIGHTS(a) ((weight_t)(a))
562 #define UNPACK_RWEIGHTS(a) ((weight_t)(a))
563 
564 #define DW ":"
565 
566 #endif
567 
568 #if defined DIGITIZE_LAMBDA
569 
570 #define PACK_LAMBDA(a) ((digitized_weight_t)digitize_a_weight(a,1))
571 #define UNPACK_LAMBDA(a) ((weight_t)undigitize_a_weight(a,1))
572 #define DL "l"
573 
574 #else
575 
576 #define PACK_LAMBDA(a) ((weight_t)(a))
577 #define UNPACK_LAMBDA(a) ((weight_t)(a))
578 #define DL ":"
579 
580 #endif
581 
582 #if defined DIGITIZE_DIGRAMS
583 
584 #define PACK_DIGRAMS(a) ((digitized_weight_t)digitize_a_weight(-(a),1))
585 #define UNPACK_DIGRAMS(a) (-(weight_t)undigitize_a_weight(a,1))
586 #define SIZEOF_DIGRAMS (sizeof(digitized_weight_t))
587 #define DD "d"
588 
589 #else
590 
591 #define PACK_DIGRAMS(a) ((weight_t)(a))
592 #define UNPACK_DIGRAMS(a) ((weight_t)(a))
593 #define SIZEOF_DIGRAMS (sizeof(weight_t))
594 #define DD ":"
595 
596 #endif
597 
598 #define CLIP_LAMBDA_TOL(x) (x < 1.0/(1<<DIG_FACTOR) ? 1.0/(1<<DIG_FACTOR) : x)
599 
600 /* used in hash code */
601 
602 #define FILLEDP(a) ((a)->id)
603 #define EQUALP(a,b) ((a)==(b))
604 #define SET(a,b) (a = (b))
605 
606 #define SETMARK(a) ((a)->typ.mark = (unsigned int)1)
607 #define UNSETMARK(a) ((a)->typ.mark = (unsigned int)0)
608 #define MARKEDP(a) ((a)->typ.mark == (unsigned int)1)
609 
610 #define NOTNULL(x) ((x) > 0)
611 
612 #define MAXIMUM(x,y) (((x)<(y))?(y):(x))
613 #define INCREMENT(x,y,z) if( (x) < (y) ) { (x)++; } else { z = 1; }
614 #define INCREASE(x,d,y,z) if( (x) < ((y)-(d)) ) { (x) += (d); } else { z = 1; }
615 
616 #if defined PORTABLE_CATS
617 #define SIGNATURE VERSION " " DD DL DW " " "portable"
618 
619 #define NTOH_ID(x)      ntoh_hash_value_t(x)
620 #define HTON_ID(x)      hton_hash_value_t(x)
621 
622 #define NTOH_DIGRAM(x)  ntoh_digitized_weight_t(x)
623 #define HTON_DIGRAM(x)  hton_digitized_weight_t(x)
624 
625 #define NTOH_LAMBDA(x)  ntoh_digitized_weight_t(x)
626 #define HTON_LAMBDA(x)  hton_digitized_weight_t(x)
627 
628 #else
629 #define SIGNATURE VERSION " " DD DL DW " " TARGETCPU
630 
631 #define NTOH_ID(x)      (x)
632 #define HTON_ID(x)      (x)
633 
634 #define NTOH_DIGRAM(x)  (x)
635 #define HTON_DIGRAM(x)  (x)
636 
637 #define NTOH_LAMBDA(x)  (x)
638 #define HTON_LAMBDA(x)  (x)
639 
640 #endif
641 
642 /* used by both category load and learner save functions */
643 #define MAGIC_BUFSIZE 512
644 #define MAGIC1    "# dbacl " SIGNATURE " category %s %s\n"
645 #define MAGIC1_LEN (17 + strlen(SIGNATURE))
646 #define MAGIC2_i  "# entropy %" FMT_scanf_score_t \
647                   " logZ %" FMT_scanf_score_t " max_order %hd" \
648                   " type %s\n"
649 #define MAGIC2_o  "# entropy %" FMT_printf_score_t \
650                   " logZ %" FMT_printf_score_t " max_order %hd" \
651                   " type %s\n"
652 #define MAGIC3    "# hash_size %hd" \
653                   " features %ld unique_features %ld" \
654                   " documents %ld\n"
655 #define MAGIC4_i  "# options %" FMT_scanf_options_t " %hd %hd (%s)\n"
656 #define MAGIC4_o  "# options %" FMT_printf_options_t " %hd %hd (%s)\n"
657 #define MAGIC5_i  "# regex %s\n"
658 #define MAGIC5_o  "# regex %s||%s\n"
659 #define MAGIC5_wo "# regex %ls||%s\n"
660 #define MAGIC7_i  "# antiregex %s\n"
661 #define MAGIC7_o  "# antiregex %s||%s\n"
662 #define MAGIC7_wo "# antiregex %ls||%s\n"
663 #define MAGIC9    "# min_feature_count %ld max_feature_count %ld\n"
664 #define RESTARTPOS 8
665 #define MAGIC6    "#\n"
666 #define MAGIC8_i  "# shannon %" FMT_scanf_score_t \
667                   " shannon_s2 %" FMT_scanf_score_t "\n"
668 #define MAGIC8_o  "# shannon %" FMT_printf_score_t \
669                   " shannon_s2 %" FMT_printf_score_t "\n"
670 #define MAGIC10_i "# alpha %" FMT_scanf_score_t \
671                   " beta %" FMT_scanf_score_t \
672                   " mu %" FMT_scanf_score_t \
673                   " s2 %" FMT_scanf_score_t "\n"
674 #define MAGIC10_o "# alpha %" FMT_printf_score_t \
675                   " beta %" FMT_printf_score_t \
676                   " mu %" FMT_printf_score_t \
677                   " s2 %" FMT_printf_score_t "\n"
678 #define MAGIC11   "# medialp "
679 
680 #define MAGIC_ONLINE "# dbacl " SIGNATURE " online memory dump\n"
681 
682 #define MAGIC_DUMP "# lambda | dig_ref | count | id     | token\n"
683 #define MAGIC_DUMPTBL_o "%9.3f %9.3f %7" FMT_printf_integer_t " %8lx "
684 #define MAGIC_DUMPTBL_i "%f %f %d %lx "
685 
686 /* data structures */
687 #define TOKEN_CLASS_MAX 16
688 #define TOKEN_ORDER_MAX 8
689 typedef struct {
690   token_class_t cls: 4;
691   token_order_t order: 3;
692   unsigned int mark: 1;
693 } PACK_STRUCTS token_type_t;
694 
695 
696 typedef struct {
697   hash_value_t id;
698   token_count_t count;
699 } h_item_t;
700 
701 typedef struct {
702   hash_count_t max_tokens;
703   hash_bit_count_t max_hash_bits;
704   token_count_t full_token_count;
705   token_count_t unique_token_count;
706   h_item_t *hash;
707   bool_t track_features;
708   h_item_t *feature_stack[MAX_TOKEN_LINE_STACK];
709   token_stack_t feature_stack_top;
710   int hashfull_warning;
711 } empirical_t;
712 
713 typedef struct {
714   hash_value_t id;
715 #if defined DIGITIZE_LAMBDA
716   digitized_weight_t lam;
717 #else
718   weight_t lam;
719 #endif
720 } PACK_STRUCTS c_item_t;
721 
722 typedef enum {simple, sequential} mtype;
723 
724 typedef struct {
725   char *filename;
726   char *fullfilename;
727   token_order_t max_order;
728   token_count_t fcomplexity;
729   token_count_t model_unique_token_count;
730   token_count_t model_full_token_count;
731   document_count_t model_num_docs;
732   hash_count_t max_tokens;
733   hash_bit_count_t max_hash_bits;
734   re_bitfield retype;
735   score_t logZ;
736   score_t divergence;
737   score_t renorm;
738   score_t delta;
739   score_t complexity;
740   score_t score;
741   score_t score_div;
742   score_t score_s2;
743   score_t score_shannon;
744   score_t shannon;
745   score_t shannon_s2;
746   score_t alpha;
747   score_t beta;
748   score_t mu;
749   score_t s2;
750   score_t prior;
751   token_count_t fmiss;
752   token_count_t mediacounts[TOKEN_CLASS_MAX];
753   struct {
754     mtype type;
755     options_t options;
756     charparser_t cp;
757     digtype_t dt;
758   } model;
759   options_t c_options;
760   c_item_t *hash;
761   byte_t *mmap_start;
762   long mmap_offset;
763 #if defined DIGITIZE_DIGRAMS
764   digitized_weight_t dig[ASIZE][ASIZE];
765 #else
766   weight_t dig[ASIZE][ASIZE];
767 #endif
768 } category_t;
769 
770 typedef struct {
771   token_count_t count;
772   weight_t B; /* mustn't digitize this :-( */
773 #if defined DIGITIZE_LAMBDA
774   digitized_weight_t lam;
775 #else
776   weight_t lam;
777 #endif
778   union {
779     struct {
780 #if defined DIGITIZE_LWEIGHTS
781       digitized_weight_t ltrms;
782       digitized_weight_t dref;
783 #else
784       weight_t ltrms;
785       weight_t dref;
786 #endif
787     } min;
788     struct {
789       token_count_t eff;
790     } read;
791   } tmp;
792   hash_value_t id;
793   token_type_t typ;
794 } PACK_STRUCTS l_item_t;
795 
796 typedef struct {
797   hash_value_t *stack;
798   hash_count_t top;
799   hash_count_t max;
800   score_t shannon;
801 } emplist_t;
802 
803 typedef struct {
804   char *filename;
805   struct {
806     FILE *file;
807     char *filename;
808     void *iobuf;
809     long offset;
810     long used;
811     off_t avail;
812     byte_t *mmap_start;
813     long mmap_offset;
814     size_t mmap_length;
815     long mmap_cursor;
816   } tmp;
817   re_bitfield retype;
818   token_order_t max_order;
819   token_count_t fixed_order_token_count[MAX_SUBMATCH];
820   token_count_t fixed_order_unique_token_count[MAX_SUBMATCH];
821   hash_bit_count_t max_hash_bits;
822   hash_count_t max_tokens;
823   token_count_t full_token_count;
824   token_count_t unique_token_count;
825   token_count_t tmax;
826   score_t logZ;
827   score_t divergence;
828   score_t shannon;
829   score_t shannon2;
830   score_t alpha;
831   score_t beta;
832   score_t mu;
833   score_t s2;
834   score_t mediaprobs[TOKEN_CLASS_MAX];
835   struct {
836     options_t options;
837     charparser_t cp;
838     digtype_t dt;
839     int tmin;
840   } model;
841   options_t u_options;
842   byte_t *mmap_start;
843   long mmap_learner_offset;
844   long mmap_hash_offset;
845   l_item_t *hash;
846   weight_t dig[ASIZE][ASIZE];
847   long int regex_token_count[MAX_RE + 1];
848   struct {
849     score_t A;
850     score_t S;
851     document_count_t count;
852     document_count_t nullcount;
853     bool_t skip;
854 #define RESERVOIR_SIZE 25
855 /*       #define RESERVOIR_SIZE 12 */
856     /* the reservoir size constrains the accuracy of the variance
857      * estimate. Since this is a heavy computation, we want
858      * to choose the lowest value we can get away with. Here 12
859      * gives an estimate for the error term to within sigma/3, which
860      * hopefully is godd enough for most cases.
861      */
862     emplist_t emp;
863     emplist_t reservoir[RESERVOIR_SIZE];
864   } doc;
865 } learner_t;
866 /* this is used when minimizing learner divergence */
867 #define MAX_LAMBDA_JUMP 100
868 
869 typedef struct {
870   double alpha;
871   double u[ASIZE];
872 } dirichlet_t;
873 
874 typedef struct {
875   regex_t regex;
876   char *string;
877   smbitmap_t submatches;
878   regex_flags_t flags;
879 } myregex_t;
880 
881 #define MAX_BOUNDARIES 8
882 
883 #define MAX_BOUNDARY_BUFSIZE 70
884 
885 typedef enum { ceUNDEF, ceID, ceB64, ceQP, ceBIN, ceSEVEN} MIME_Content_Encoding;
886 typedef enum {
887   ctUNDEF,
888   ctTEXT_PLAIN, ctTEXT_RICH, ctTEXT_HTML, ctTEXT_XML, ctTEXT_SGML, ctTEXT_UNKNOWN,
889   ctIMAGE,
890   ctAUDIO,
891   ctVIDEO,
892   ctMODEL,
893   ctMESSAGE_RFC822,
894   ctOTHER,
895   ctOCTET_STREAM,
896   ctAPPLICATION_MSWORD
897 } MIME_Content_Type;
898 
899 typedef struct {
900   MIME_Content_Type type;
901   MIME_Content_Encoding encoding;
902 } MIME_Struct;
903 
904 typedef enum { htSTANDARD, htEXTENDED, htTRACE, htMIME, htCONT, htUNDEF } HEADER_Type;
905 
906 typedef enum { msUNDEF=1, msHEADER, msBODY, msATTACH} Mstate;
907 typedef enum { msuUNDEF=1, msuTRACK, msuMIME, msuARMOR, msuOTHER } Msubstate;
908 typedef enum { mhsUNDEF=1, mhsSUBJECT, mhsFROM, mhsTO, mhsMIME, mhsXHEADER, mhsTRACE} Mhstate;
909 typedef enum { maUNDEF=1, maENABLED} Marmor;
910 typedef enum { psPLAIN, psUUENCODE } Mplainstate;
911 typedef enum { hidUNDEF=1, hidCONTINUATION,
912 	       hidRECEIVED, hidRETURN_PATH, hidRETURN_RECEIPT_TO, hidREPLY_TO,
913 	       hidMESSAGE_ID, hidREFERENCES, hidIN_REPLY_TO,
914 	       hidRESENT_, hidORIGINAL_,
915 	       hidFROM, hidCC, hidBCC, hidSENT, hidSENDER,
916 	       hidTO,
917 	       hidSUBJECT,
918 	       hidCONTENT_, hidMIME_VERSION,
919 	       hidLIST_,
920 	       hidX_,
921 	       hidUSER_AGENT,
922 	       hidX_MS, hidCATEGORY, hidPRIORITY, hidIMPORTANCE, hidTHREAD_,
923 	       hidCOMMENTS, hidKEYWORDS, hidNOTE
924 } Mheaderid;
925 
926 typedef struct {
927   char *cache;
928   char *data_ptr;
929   size_t cache_len;
930   size_t max_line_len;
931 } decoding_cache;
932 
933 #if defined HAVE_MBRTOWC
934 
935 typedef struct {
936   wchar_t *cache;
937   wchar_t *data_ptr;
938   size_t cache_len;
939   size_t max_line_len;
940 } w_decoding_cache;
941 
942 #endif
943 
944 typedef struct {
945   Mstate state;
946   Msubstate substate;
947   Mhstate hstate;
948   Mheaderid hid;
949   Marmor armor;
950   MIME_Struct header, body;
951   bool_t prev_line_empty;
952   bool_t skip_until_boundary;
953   bool_t corruption_check;
954   bool_t skip_header;
955   char strip_header_char;
956 #if defined HAVE_MBRTOWC
957   wchar_t w_strip_header_char;
958 #endif
959   Mplainstate plainstate;
960   struct {
961     int size[MAX_BOUNDARIES];
962     char identifier[MAX_BOUNDARIES][MAX_BOUNDARY_BUFSIZE];
963 #if defined HAVE_MBRTOWC
964     wchar_t w_identifier[MAX_BOUNDARIES][MAX_BOUNDARY_BUFSIZE];
965 #endif
966     int index;
967     bool_t was_end;
968   } boundary;
969   decoding_cache b64_dc;
970   decoding_cache qp_dc;
971 #if defined HAVE_MBRTOWC
972   w_decoding_cache w_b64_dc;
973   w_decoding_cache w_qp_dc;
974 #endif
975 } MBOX_State;
976 
977 typedef enum {TEXT=1, XTAG, XTAGQUOTE, XTAGDQUOTE, XTAGPREQ, TAG, TAGQUOTE, TAGDQUOTE, TAGPREQ, CMNT, DISABLED} Xstate;
978 typedef enum {ALT=1, SRC, SRC_NETLOC, SRC_NETLOC_PREFIX, SRC_NETLOC_PATH, SRC_NETLOC_SUFFIX, UNDEF, JSCRIPT, ASTYLE} Xattribute;
979 typedef enum {xpDUMB=1, xpHTML, xpSMART} Xparser;
980 typedef enum {SCRIPT=1,STYLE,COMMENT,NOFRAMES,NOEMBED,NOSCRIPT,NOLAYER,TITLE,VISIBLE} Xhide;
981 
982 typedef struct {
983   Xstate state;
984   Xattribute attribute;
985   Xparser parser;
986   Xhide hide;
987 } XML_State;
988 
989 typedef enum {xmlRESET,xmlDISABLE,xmlSMART,xmlHTML,xmlDUMB,xmlUNDEF} XML_Reset;
990 
991 #ifdef __cplusplus
992 extern "C"
993 {
994 #endif
995 
996   /* these are defined in dbacl.c */
997   void sanitize_options();
998   int set_option(int op, char *optarg);
999 
1000   void init_learner(learner_t *learner, char *opath, bool_t readonly);
1001   void free_learner(learner_t *learner);
1002 
1003   void reset_mbox_messages(learner_t *learner, MBOX_State *mbox);
1004   void count_mbox_messages(learner_t *learner, Mstate mbox_state, char *buf);
1005   void calc_shannon(learner_t *learner);
1006   void update_shannon_partials(learner_t *learner, bool_t fulldoc);
1007   void optimize_and_save(learner_t *learner);
1008 
1009   l_item_t *find_in_learner(learner_t *learner, hash_value_t id);
1010   bool_t grow_learner_hash(learner_t *learner);
1011   void hash_word_and_learn(learner_t *learner,
1012 			   char *tok, token_type_t tt, regex_count_t re);
1013 
1014   void make_dirichlet_digrams(learner_t *learner);
1015   void make_uniform_digrams(learner_t *learner);
1016   void transpose_digrams(learner_t *learner);
1017 
1018   bool_t read_online_learner_struct(learner_t *learner, char *opath, bool_t readonly);
1019   void write_online_learner_struct(learner_t *learner, char *opath);
1020   error_code_t save_learner(learner_t *learner, char *opath);
1021 
1022 
1023   /* these are defined in catfun.c */
1024   char *sanitize_path(char *in, char *extension);
1025   error_code_t sanitize_model_options(options_t *to, charparser_t *mcp, category_t *cat);
1026   /*@shared@*/ char *print_model_options(options_t opt, charparser_t mcp, /*@out@*/ char *buf);
1027   char *print_user_options(options_t opt, char *buf);
1028 
1029   void init_empirical(empirical_t *emp, hash_count_t dmt, hash_bit_count_t dmhb);
1030   void free_empirical(empirical_t *emp);
1031   void clear_empirical(empirical_t *emp);
1032   h_item_t *find_in_empirical(empirical_t *emp, hash_value_t id);
1033   score_t empirical_entropy(empirical_t *emp);
1034 
1035 
1036   void init_category(category_t *cat);
1037   void free_category(category_t *cat);
1038   c_item_t *find_in_category(category_t *cat, hash_value_t id);
1039   void init_purely_random_text_category(category_t *cat);
1040   error_code_t load_category(category_t *cat);
1041   error_code_t load_category_header(FILE *input, category_t *cat);
1042   error_code_t open_category(category_t *cat);
1043   void reload_all_categories();
1044 
1045   void score_word(char *tok, token_type_t tt, regex_count_t re);
1046   confidence_t gamma_pvalue(category_t *cat, double obs);
1047 
1048   /* file format handling in fh.c */
1049   void init_file_handling();
1050   void cleanup_file_handling();
1051 
1052   token_class_t get_token_class();
1053   regex_count_t load_regex(char *buf);
1054   void free_all_regexes();
1055 
1056   /* common multibyte and wide char functions in mbw.c */
1057   good_char_t good_char(char *c);
1058   void std_tokenizer(char *p, char **pq, char *hbuf,
1059 		     token_order_t *hbuf_order, token_order_t max_order,
1060 		     void (*word_fun)(char *, token_type_t, regex_count_t),
1061 		     token_type_t (*get_tt)(token_order_t));
1062   void regex_tokenizer(char *p, int i,
1063 		       void (*word_fun)(char *, token_type_t, regex_count_t),
1064 		       token_type_t (*get_tt)(token_order_t));
1065   void init_decoding_caches(MBOX_State *mbox);
1066   void free_decoding_caches(MBOX_State *mbox);
1067   bool_t b64_line_filter(decoding_cache *b64cache, char *line);
1068   char *b64_line_filter2(char *line, char *q);
1069   bool_t b64_line_flush(char *line, bool_t all);
1070   bool_t qp_line_filter(decoding_cache *qpcache, char *line);
1071   char *qp_line_filter2(char *line, char *q);
1072   bool_t qp_line_flush(char *line, bool_t all);
1073   bool_t mhe_line_filter(char *line);
1074   int extract_header_label(MBOX_State *mbox, char *line);
1075   bool_t extract_mime_boundary(MBOX_State *mbox, char *line);
1076   bool_t check_mime_boundary(MBOX_State *mbox, const char *line);
1077   bool_t mbox_line_filter(MBOX_State *mbox, char *line, XML_State *xml);
1078   bool_t plain_text_filter(MBOX_State *mbox, char *line);
1079   bool_t strings1_filter(char *line);
1080 
1081   void xml_character_filter(XML_State *xml, char *line);
1082   void process_file(FILE *input,
1083 		    int (*line_filter)(MBOX_State *, char *),
1084 		    void (*character_filter)(XML_State *, char *),
1085 		    void (*word_fun)(char *, token_type_t, regex_count_t),
1086 		    char *(*pre_line_fun)(char *),
1087 		    void (*post_line_fun)(char *));
1088   void process_directory(char *name,
1089 			 int (*line_filter)(MBOX_State *, char *),
1090 			 void (*character_filter)(XML_State *, char *),
1091 			 void (*word_fun)(char *, token_type_t, regex_count_t),
1092 			 char *(*pre_line_fun)(char *),
1093 			 void (*post_line_fun)(char *),
1094 			 void (*post_file_fun)(char *));
1095 
1096   void init_mbox_line_filter(MBOX_State *mbox);
1097   void free_mbox_line_filter(MBOX_State *mbox);
1098   void reset_mbox_line_filter(MBOX_State *mbox);
1099   void reset_xml_character_filter(XML_State *xml, XML_Reset reset);
1100   XML_Reset select_xml_defaults(MIME_Struct *mime);
1101 
1102   /* probabilities in probs.c */
1103   double log_poisson(int k, double lambda);
1104   double sample_mean(double x, double n);
1105   double sample_variance(double ss, double x, double n);
1106   double min_prob(int k, int n, double mu[], double sigma[]);
1107 
1108 #if defined HAVE_MBRTOWC
1109 /*   int w_b64_code(wchar_t c); */
1110 /*   int w_qp_code(wchar_t c); */
1111   good_char_t w_good_char(wchar_t *c);
1112   void w_std_tokenizer(wchar_t *p, char **pq, char *hbuf,
1113 		       token_order_t *hbuf_order, token_order_t max_order,
1114 		       void (*word_fun)(char *, token_type_t, regex_count_t),
1115 		       token_type_t (*get_tt)(token_order_t));
1116   void w_regex_tokenizer(wchar_t *p, int i,
1117 			 void (*word_fun)(char *, token_type_t, regex_count_t),
1118 			 token_type_t (*get_tt)(token_order_t));
1119   void w_init_decoding_caches(MBOX_State *mbox);
1120   void w_free_decoding_caches(MBOX_State *mbox);
1121   bool_t w_b64_line_filter(w_decoding_cache *w_b64cache, wchar_t *line);
1122   wchar_t *w_b64_line_filter2(wchar_t *line, wchar_t *q);
1123   bool_t w_b64_line_flush(wchar_t *line, bool_t all);
1124   bool_t w_qp_line_filter(w_decoding_cache *w_qpcache, wchar_t *line);
1125   wchar_t *w_qp_line_filter2(wchar_t *line, wchar_t *q);
1126   bool_t w_qp_line_flush(wchar_t *line, bool_t all);
1127   bool_t w_mhe_line_filter(wchar_t *line);
1128   int w_extract_header_label(MBOX_State *mbox, wchar_t *line);
1129   bool_t w_extract_mime_boundary(MBOX_State *mbox, wchar_t *line);
1130   bool_t w_check_mime_boundary(MBOX_State *mbox, const wchar_t *line);
1131   bool_t w_mbox_line_filter(MBOX_State *mbox, wchar_t *line, XML_State *xml);
1132   bool_t w_plain_text_filter(MBOX_State *mbox, wchar_t *line);
1133   bool_t w_strings1_filter(wchar_t *line);
1134 
1135   int wcsncasecmp(const wchar_t *s1, const wchar_t *s2, size_t n);
1136 
1137   void w_xml_character_filter(XML_State *xml, wchar_t *line);
1138   void w_process_file(FILE *input,
1139 		      int (*line_filter)(MBOX_State *, wchar_t *),
1140 		      void (*character_filter)(XML_State *, wchar_t *),
1141 		      void (*word_fun)(char *, token_type_t, regex_count_t),
1142 		      char *(*pre_line_fun)(char *),
1143 		      void (*post_line_fun)(char *));
1144   void w_process_directory(char *name,
1145 			   int (*line_filter)(MBOX_State *, wchar_t *),
1146 			   void (*character_filter)(XML_State *, wchar_t *),
1147 			   void (*word_fun)(char *, token_type_t, regex_count_t),
1148 			   char *(*pre_line_fun)(char *),
1149 			   void (*post_line_fun)(char *),
1150 			   void (*post_file_fun)(char *));
1151 
1152 #endif
1153 
1154 #ifdef _SC_PAGE_SIZE
1155 #ifndef _SC_PAGESIZE
1156 #define _SC_PAGESIZE _SC_PAGE_SIZE
1157 #endif
1158 #endif
1159 
1160 
1161 #ifdef __cplusplus
1162 }
1163 #endif
1164 
1165 #endif
1166