1 // Judy arrays 13 DEC 2012 (judy64n.c from http://code.google.com/p/judyarray/ )
2 // This code is public domain.
3
4 // Author Karl Malbrain, malbrain AT yahoo.com
5 // with assistance from Jan Weiss.
6 // modifications (and any bugs) by Mark Pictor, mpictor at gmail
7
8 // Simplified judy arrays for strings and integers
9 // Adapted from the ideas of Douglas Baskins of HP.
10
11 // Map a set of keys to corresponding memory cells (uints).
12 // Each cell must be set to a non-zero value by the caller.
13
14
15 // Integer mappings are denoted by calling judy_open with the
16 // Integer depth of the Judy Trie as the second argument.
17
18 #ifndef STANDALONE
19 # error must define STANDALONE while compiling this file and judy64.c
20 #endif
21
22 #include "judy.h"
23 #include "sort.h"
24
25 unsigned int MaxMem = 0;
26 /**
27 usage:
28 a.out [in-file] [out-file]
29 where all lines of in-file are 32 chars, hexadecimal
30 On linux, a 1M-line file can be created with the following:
31 hexdump -v -e '2/8 "%08x"' -e '"\n"' /dev/urandom |head -n 1000000 >in-file
32 */
main(int argc,char ** argv)33 int main( int argc, char ** argv ) {
34 unsigned char buff[1024];
35 JudySlot max = 0;
36 JudySlot * cell;
37 FILE * in, *out;
38 void * judy;
39 unsigned int len;
40 unsigned int idx;
41
42 if( argc > 1 ) {
43 in = fopen( argv[1], "rb" );
44 } else {
45 in = stdin;
46 }
47
48 if( argc > 2 ) {
49 out = fopen( argv[2], "wb" );
50 } else {
51 out = stdout;
52 }
53
54 setvbuf( out, NULL, _IOFBF, 4096 * 1024 );
55
56 if( !in ) {
57 fprintf( stderr, "unable to open input file\n" );
58 }
59
60 if( !out ) {
61 fprintf( stderr, "unable to open output file\n" );
62 }
63
64 PennyMerge = ( unsigned long long )PennyLine * PennyRecs;
65
66 judy = judy_open( 1024, 16 / JUDY_key_size );
67
68 while( fgets( ( char * )buff, sizeof( buff ), in ) ) {
69 judyvalue key[16 / JUDY_key_size];
70 if( len = strlen( ( const char * )buff ) ) {
71 buff[--len] = 0; // remove LF
72 }
73 #if JUDY_key_size == 4
74 key[3] = strtoul( buff + 24, NULL, 16 );
75 buff[24] = 0;
76 key[2] = strtoul( buff + 16, NULL, 16 );
77 buff[16] = 0;
78 key[1] = strtoul( buff + 8, NULL, 16 );
79 buff[8] = 0;
80 key[0] = strtoul( buff, NULL, 16 );
81 #else
82 key[1] = strtoull( buff + 16, NULL, 16 );
83 buff[16] = 0;
84 key[0] = strtoull( buff, NULL, 16 );
85 #endif
86 *( judy_cell( judy, ( void * )key, 0 ) ) += 1; // count instances of string
87 max++;
88 }
89
90 fprintf( stderr, "%" PRIuint " memory used\n", MaxMem );
91
92 cell = judy_strt( judy, NULL, 0 );
93
94 if( cell ) do {
95 judyvalue key[16 / JUDY_key_size];
96 len = judy_key( judy, ( void * )key, 0 );
97 for( idx = 0; idx < *cell; idx++ ) { // spit out duplicates
98 #if JUDY_key_size == 4
99 fprintf( out, "%.8X", key[0] );
100 fprintf( out, "%.8X", key[1] );
101 fprintf( out, "%.8X", key[2] );
102 fprintf( out, "%.8X", key[3] );
103 #else
104 fprintf( out, "%.16llX", key[0] );
105 fprintf( out, "%.16llX", key[1] );
106 #endif
107 fputc( '\n', out );
108 }
109 } while( cell = judy_nxt( judy ) );
110
111 #if 0
112 // test deletion all the way to an empty tree
113
114 if( cell = judy_prv( judy ) )
115 do {
116 max -= *cell;
117 } while( cell = judy_del( judy ) );
118
119 assert( max == 0 );
120 #endif
121 judy_close( judy );
122 return 0;
123 }
124
125