1 /* $NetBSD: sort.h,v 1.34 2011/09/16 15:39:29 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)sort.h 8.1 (Berkeley) 6/6/93 64 */ 65 66 #include <sys/param.h> 67 68 #include <err.h> 69 #include <errno.h> 70 #include <fcntl.h> 71 #include <limits.h> 72 #include <stddef.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 77 #define NBINS 256 78 79 /* values for masks, weights, and other flags. */ 80 /* R and F get used to index weight_tables[] */ 81 #define R 0x01 /* Field is reversed */ 82 #define F 0x02 /* weight lower and upper case the same */ 83 #define I 0x04 /* mask out non-printable characters */ 84 #define D 0x08 /* sort alphanumeric characters only */ 85 #define N 0x10 /* Field is a number */ 86 #define BI 0x20 /* ignore blanks in icol */ 87 #define BT 0x40 /* ignore blanks in tcol */ 88 #define L 0x80 /* Sort by field length */ 89 #if defined(__minix) 90 #define X 0x100 /* Field is a hex number */ 91 #endif /* defined(__minix) */ 92 93 /* masks for delimiters: blanks, fields, and termination. */ 94 #define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */ 95 #define FLD_D 2 /* ' ', '\t' default; from -t otherwise */ 96 #define REC_D_F 4 /* '\n' default; from -R otherwise */ 97 98 #define min(a, b) ((a) < (b) ? (a) : (b)) 99 #define max(a, b) ((a) > (b) ? (a) : (b)) 100 101 #define FCLOSE(file) { \ 102 if (EOF == fclose(file)) \ 103 err(2, "%p", file); \ 104 } 105 106 #define EWRITE(ptr, size, n, f) { \ 107 if (!fwrite(ptr, size, n, f)) \ 108 err(2, NULL); \ 109 } 110 111 /* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort 112 * in a sane way. 113 * Anyone who wants to sort data records longer than 2GB definitely needs a 114 * different program! */ 115 typedef unsigned int length_t; 116 117 /* A record is a key/line pair starting at rec.data. It has a total length 118 * and an offset to the start of the line half of the pair. 119 */ 120 typedef struct recheader { 121 length_t length; /* total length of key and line */ 122 length_t offset; /* to line */ 123 int keylen; /* length of key */ 124 u_char data[]; /* key then line */ 125 } RECHEADER; 126 127 /* This is the column as seen by struct field. It is used by enterfield. 128 * They are matched with corresponding coldescs during initialization. 129 */ 130 struct column { 131 struct coldesc *p; 132 int num; 133 int indent; 134 }; 135 136 /* a coldesc has a number and pointers to the beginning and end of the 137 * corresponding column in the current line. This is determined in enterkey. 138 */ 139 typedef struct coldesc { 140 u_char *start; 141 u_char *end; 142 int num; 143 } COLDESC; 144 145 /* A field has an initial and final column; an omitted final column 146 * implies the end of the line. Flags regulate omission of blanks and 147 * numerical sorts; mask determines which characters are ignored (from -i, -d); 148 * weights determines the sort weights of a character (from -f, -r). 149 * 150 * The first field contain the global flags etc. 151 * The list terminates when icol = 0. 152 */ 153 struct field { 154 struct column icol; 155 struct column tcol; 156 u_int flags; 157 u_char *mask; 158 u_char *weights; 159 }; 160 161 struct filelist { 162 const char * const * names; 163 }; 164 165 typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *); 166 typedef void (*put_func_t)(const RECHEADER *, FILE *); 167 168 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; 169 extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */ 170 extern u_char d_mask[NBINS]; 171 extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE; 172 extern int posix_sort; 173 extern int REC_D; 174 extern const char *tmpdir; 175 extern struct coldesc *clist; 176 extern int ncols; 177 178 #define DEBUG(ch) (debug_flags & (1 << ((ch) & 31))) 179 extern unsigned int debug_flags; 180 181 RECHEADER *allocrec(RECHEADER *, size_t); 182 void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *)); 183 void concat(FILE *, FILE *); 184 length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *); 185 void fixit(int *, char **, const char *); 186 void fldreset(struct field *); 187 FILE *ftmp(void); 188 void fmerge(struct filelist *, int, FILE *, struct field *); 189 void save_for_merge(FILE *, get_func_t, struct field *); 190 void merge_sort(FILE *, put_func_t, struct field *); 191 void fsort(struct filelist *, int, FILE *, struct field *); 192 int geteasy(FILE *, RECHEADER *, u_char *, struct field *); 193 int makekey(FILE *, RECHEADER *, u_char *, struct field *); 194 int makeline(FILE *, RECHEADER *, u_char *, struct field *); 195 void makeline_copydown(RECHEADER *); 196 int optval(int, int); 197 __dead void order(struct filelist *, struct field *); 198 void putline(const RECHEADER *, FILE *); 199 void putrec(const RECHEADER *, FILE *); 200 void putkeydump(const RECHEADER *, FILE *); 201 void rd_append(int, int, int, FILE *, u_char *, u_char *); 202 void radix_sort(RECHEADER **, RECHEADER **, int); 203 int setfield(const char *, struct field *, int); 204 void settables(void); 205