1 //  Judy arrays 13 DEC 2012 (judy64n.c from http://code.google.com/p/judyarray/ )
2 //  This code is public domain.
3 
4 //  Author Karl Malbrain, malbrain AT yahoo.com
5 //  with assistance from Jan Weiss.
6 //  modifications (and any bugs) by Mark Pictor, mpictor at gmail
7 
8 //  Simplified judy arrays for strings and integers
9 //  Adapted from the ideas of Douglas Baskins of HP.
10 
11 //  Map a set of keys to corresponding memory cells (uints).
12 //  Each cell must be set to a non-zero value by the caller.
13 
14 
15 //  Integer mappings are denoted by calling judy_open with the
16 //  Integer depth of the Judy Trie as the second argument.
17 
18 #ifndef STANDALONE
19 #  error must define STANDALONE while compiling this file and judy64.c
20 #endif
21 
22 #include "judy.h"
23 #include "sort.h"
24 
25 unsigned int MaxMem = 0;
26 /**
27    usage:
28    a.out [in-file] [out-file]
29    where all lines of in-file are 32 chars, hexadecimal
30    On linux, a 1M-line file can be created with the following:
31    hexdump -v -e '2/8 "%08x"' -e '"\n"' /dev/urandom |head -n 1000000 >in-file
32 */
main(int argc,char ** argv)33 int main( int argc, char ** argv ) {
34     unsigned char buff[1024];
35     JudySlot max = 0;
36     JudySlot * cell;
37     FILE * in, *out;
38     void * judy;
39     unsigned int len;
40     unsigned int idx;
41 
42     if( argc > 1 ) {
43         in = fopen( argv[1], "rb" );
44     } else {
45         in = stdin;
46     }
47 
48     if( argc > 2 ) {
49         out = fopen( argv[2], "wb" );
50     } else {
51         out = stdout;
52     }
53 
54     setvbuf( out, NULL, _IOFBF, 4096 * 1024 );
55 
56     if( !in ) {
57         fprintf( stderr, "unable to open input file\n" );
58     }
59 
60     if( !out ) {
61         fprintf( stderr, "unable to open output file\n" );
62     }
63 
64     PennyMerge = ( unsigned long long )PennyLine * PennyRecs;
65 
66     judy = judy_open( 1024, 16 / JUDY_key_size );
67 
68     while( fgets( ( char * )buff, sizeof( buff ), in ) ) {
69         judyvalue key[16 / JUDY_key_size];
70         if( len = strlen( ( const char * )buff ) ) {
71             buff[--len] = 0;    // remove LF
72         }
73 #if JUDY_key_size == 4
74         key[3] = strtoul( buff + 24, NULL, 16 );
75         buff[24] = 0;
76         key[2] = strtoul( buff + 16, NULL, 16 );
77         buff[16] = 0;
78         key[1] = strtoul( buff + 8, NULL, 16 );
79         buff[8] = 0;
80         key[0] = strtoul( buff, NULL, 16 );
81 #else
82         key[1] = strtoull( buff + 16, NULL, 16 );
83         buff[16] = 0;
84         key[0] = strtoull( buff, NULL, 16 );
85 #endif
86         *( judy_cell( judy, ( void * )key, 0 ) ) += 1;   // count instances of string
87         max++;
88     }
89 
90     fprintf( stderr, "%" PRIuint " memory used\n", MaxMem );
91 
92     cell = judy_strt( judy, NULL, 0 );
93 
94     if( cell ) do {
95             judyvalue key[16 / JUDY_key_size];
96             len = judy_key( judy, ( void * )key, 0 );
97             for( idx = 0; idx < *cell; idx++ ) {       // spit out duplicates
98 #if JUDY_key_size == 4
99                 fprintf( out, "%.8X", key[0] );
100                 fprintf( out, "%.8X", key[1] );
101                 fprintf( out, "%.8X", key[2] );
102                 fprintf( out, "%.8X", key[3] );
103 #else
104                 fprintf( out, "%.16llX", key[0] );
105                 fprintf( out, "%.16llX", key[1] );
106 #endif
107                 fputc( '\n', out );
108             }
109         } while( cell = judy_nxt( judy ) );
110 
111 #if 0
112     // test deletion all the way to an empty tree
113 
114     if( cell = judy_prv( judy ) )
115         do {
116             max -= *cell;
117         } while( cell = judy_del( judy ) );
118 
119     assert( max == 0 );
120 #endif
121     judy_close( judy );
122     return 0;
123 }
124 
125