1 /* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* 65 * Read in a block of records (until 'enough'). 66 * sort, write to temp file. 67 * Merge sort temp files into output file 68 * Small files miss out the temp file stage. 69 * Large files might get multiple merges. 70 */ 71 #include "sort.h" 72 #include "fsort.h" 73 74 __RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $"); 75 76 #include <stdlib.h> 77 #include <string.h> 78 79 #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1)) 80 81 void 82 fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) 83 { 84 RECHEADER **keylist; 85 RECHEADER **keypos, **keyp; 86 RECHEADER *buffer; 87 size_t bufsize = DEFBUFSIZE; 88 u_char *bufend; 89 int mfct = 0; 90 int c, nelem; 91 get_func_t get; 92 RECHEADER *crec; 93 RECHEADER *nbuffer; 94 FILE *fp, *tmp_fp; 95 int file_no; 96 int max_recs = DEBUG('m') ? 16 : MAXNUM; 97 98 buffer = allocrec(NULL, bufsize); 99 bufend = (u_char *)buffer + bufsize; 100 /* Allocate double length keymap for radix_sort */ 101 keylist = malloc(2 * max_recs * sizeof(*keylist)); 102 if (buffer == NULL || keylist == NULL) 103 err(2, "failed to malloc initial buffer or keylist"); 104 105 if (SINGL_FLD) 106 /* Key and data are one! */ 107 get = makeline; 108 else 109 /* Key (merged key fields) added before data */ 110 get = makekey; 111 112 file_no = 0; 113 fp = fopen(filelist->names[0], "r"); 114 if (fp == NULL) 115 err(2, "%s", filelist->names[0]); 116 117 /* Loop through reads of chunk of input files that get sorted 118 * and then merged together. */ 119 for (;;) { 120 keypos = keylist; 121 nelem = 0; 122 crec = buffer; 123 makeline_copydown(crec); 124 125 /* Loop reading records */ 126 for (;;) { 127 c = get(fp, crec, bufend, ftbl); 128 /* 'c' is 0, EOF or BUFFEND */ 129 if (c == 0) { 130 /* Save start of key in input buffer */ 131 *keypos++ = crec; 132 if (++nelem == max_recs) { 133 c = BUFFEND; 134 break; 135 } 136 crec = (RECHEADER *)(crec->data + SALIGN(crec->length)); 137 continue; 138 } 139 if (c == EOF) { 140 /* try next file */ 141 if (++file_no >= nfiles) 142 /* no more files */ 143 break; 144 fp = fopen(filelist->names[file_no], "r"); 145 if (fp == NULL) 146 err(2, "%s", filelist->names[file_no]); 147 continue; 148 } 149 if (nelem >= max_recs 150 || (bufsize >= MAXBUFSIZE && nelem > 8)) 151 /* Need to sort and save this lot of data */ 152 break; 153 154 /* c == BUFFEND, and we can process more data */ 155 /* Allocate a larger buffer for this lot of data */ 156 bufsize *= 2; 157 nbuffer = allocrec(buffer, bufsize); 158 if (!nbuffer) { 159 err(2, "failed to realloc buffer to %zu bytes", 160 bufsize); 161 } 162 163 /* patch up keylist[] */ 164 for (keyp = &keypos[-1]; keyp >= keylist; keyp--) 165 *keyp = nbuffer + (*keyp - buffer); 166 167 crec = nbuffer + (crec - buffer); 168 buffer = nbuffer; 169 bufend = (u_char *)buffer + bufsize; 170 } 171 172 /* Sort this set of records */ 173 radix_sort(keylist, keylist + max_recs, nelem); 174 175 if (c == EOF && mfct == 0) { 176 /* all the data is (sorted) in the buffer */ 177 append(keylist, nelem, outfp, 178 DEBUG('k') ? putkeydump : putline); 179 break; 180 } 181 182 /* Save current data to a temporary file for a later merge */ 183 if (nelem != 0) { 184 tmp_fp = ftmp(); 185 append(keylist, nelem, tmp_fp, putrec); 186 save_for_merge(tmp_fp, geteasy, ftbl); 187 } 188 mfct = 1; 189 190 if (c == EOF) { 191 /* merge to output file */ 192 merge_sort(outfp, 193 DEBUG('k') ? putkeydump : putline, ftbl); 194 break; 195 } 196 } 197 198 free(keylist); 199 keylist = NULL; 200 free(buffer); 201 buffer = NULL; 202 } 203