xref: /minix/usr.bin/sort/sort.h (revision 84d9c625)
1 /*	$NetBSD: sort.h,v 1.34 2011/09/16 15:39:29 joerg Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)sort.h	8.1 (Berkeley) 6/6/93
64  */
65 
66 #include <sys/param.h>
67 
68 #include <err.h>
69 #include <errno.h>
70 #include <fcntl.h>
71 #include <limits.h>
72 #include <stddef.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 
77 #define NBINS		256
78 
79 /* values for masks, weights, and other flags. */
80 /* R and F get used to index weight_tables[] */
81 #define	R	0x01	/* Field is reversed */
82 #define	F	0x02	/* weight lower and upper case the same */
83 #define	I	0x04	/* mask out non-printable characters */
84 #define	D	0x08	/* sort alphanumeric characters only */
85 #define	N	0x10	/* Field is a number */
86 #define	BI	0x20	/* ignore blanks in icol */
87 #define	BT	0x40	/* ignore blanks in tcol */
88 #define	L	0x80	/* Sort by field length */
89 #if defined(__minix)
90 #define	X	0x100	/* Field is a hex number */
91 #endif /* defined(__minix) */
92 
93 /* masks for delimiters: blanks, fields, and termination. */
94 #define BLANK 1		/* ' ', '\t'; '\n' if -R is invoked */
95 #define FLD_D 2		/* ' ', '\t' default; from -t otherwise */
96 #define REC_D_F 4	/* '\n' default; from -R otherwise */
97 
98 #define min(a, b) ((a) < (b) ? (a) : (b))
99 #define max(a, b) ((a) > (b) ? (a) : (b))
100 
101 #define	FCLOSE(file) {							\
102 	if (EOF == fclose(file))					\
103 		err(2, "%p", file);					\
104 }
105 
106 #define	EWRITE(ptr, size, n, f) {					\
107 	if (!fwrite(ptr, size, n, f))					\
108 		 err(2, NULL);						\
109 }
110 
111 /* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
112  * in a sane way.
113  * Anyone who wants to sort data records longer than 2GB definitely needs a
114  * different program! */
115 typedef unsigned int length_t;
116 
117 /* A record is a key/line pair starting at rec.data. It has a total length
118  * and an offset to the start of the line half of the pair.
119  */
120 typedef struct recheader {
121 	length_t length;	/* total length of key and line */
122 	length_t offset;	/* to line */
123 	int      keylen;	/* length of key */
124 	u_char   data[];	/* key then line */
125 } RECHEADER;
126 
127 /* This is the column as seen by struct field.  It is used by enterfield.
128  * They are matched with corresponding coldescs during initialization.
129  */
130 struct column {
131 	struct coldesc *p;
132 	int num;
133 	int indent;
134 };
135 
136 /* a coldesc has a number and pointers to the beginning and end of the
137  * corresponding column in the current line.  This is determined in enterkey.
138  */
139 typedef struct coldesc {
140 	u_char *start;
141 	u_char *end;
142 	int num;
143 } COLDESC;
144 
145 /* A field has an initial and final column; an omitted final column
146  * implies the end of the line.  Flags regulate omission of blanks and
147  * numerical sorts; mask determines which characters are ignored (from -i, -d);
148  * weights determines the sort weights of a character (from -f, -r).
149  *
150  * The first field contain the global flags etc.
151  * The list terminates when icol = 0.
152  */
153 struct field {
154 	struct column icol;
155 	struct column tcol;
156 	u_int flags;
157 	u_char *mask;
158 	u_char *weights;
159 };
160 
161 struct filelist {
162 	const char * const * names;
163 };
164 
165 typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
166 typedef void (*put_func_t)(const RECHEADER *, FILE *);
167 
168 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
169 extern u_char *const weight_tables[4];   /* ascii, Rascii, Ftable, RFtable */
170 extern u_char d_mask[NBINS];
171 extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
172 extern int posix_sort;
173 extern int REC_D;
174 extern const char *tmpdir;
175 extern struct coldesc *clist;
176 extern int ncols;
177 
178 #define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
179 extern unsigned int debug_flags;
180 
181 RECHEADER *allocrec(RECHEADER *, size_t);
182 void	 append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
183 void	 concat(FILE *, FILE *);
184 length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
185 void	 fixit(int *, char **, const char *);
186 void	 fldreset(struct field *);
187 FILE	*ftmp(void);
188 void	 fmerge(struct filelist *, int, FILE *, struct field *);
189 void	 save_for_merge(FILE *, get_func_t, struct field *);
190 void	 merge_sort(FILE *, put_func_t, struct field *);
191 void	 fsort(struct filelist *, int, FILE *, struct field *);
192 int	 geteasy(FILE *, RECHEADER *, u_char *, struct field *);
193 int	 makekey(FILE *, RECHEADER *, u_char *, struct field *);
194 int	 makeline(FILE *, RECHEADER *, u_char *, struct field *);
195 void	 makeline_copydown(RECHEADER *);
196 int	 optval(int, int);
197 __dead void	 order(struct filelist *, struct field *);
198 void	 putline(const RECHEADER *, FILE *);
199 void	 putrec(const RECHEADER *, FILE *);
200 void	 putkeydump(const RECHEADER *, FILE *);
201 void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
202 void	 radix_sort(RECHEADER **, RECHEADER **, int);
203 int	 setfield(const char *, struct field *, int);
204 void	 settables(void);
205