1 /* $NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 /* Subroutines to generate sort keys. */ 40 41 #include "sort.h" 42 43 #ifndef lint 44 __RCSID("$NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $"); 45 __SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93"); 46 #endif /* not lint */ 47 48 #define blancmange(ptr) { \ 49 if (BLANK & d_mask[*(ptr)]) \ 50 while (BLANK & d_mask[*(++(ptr))]); \ 51 } 52 53 #define NEXTCOL(pos) { \ 54 if (!SEP_FLAG) \ 55 while (BLANK & l_d_mask[*(++pos)]); \ 56 while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \ 57 } 58 59 static u_char *enterfield __P((u_char *, u_char *, struct field *, int)); 60 static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int)); 61 62 extern struct coldesc clist[(ND+1)*2]; 63 extern int ncols; 64 65 #define DECIMAL '.' 66 #define OFFSET 128 67 68 u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ 69 u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ 70 u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ 71 u_char fnum[NBINS], rnum[NBINS]; 72 73 /* 74 * constructs sort key with leading recheader, followed by the key, 75 * followed by the original line. 76 */ 77 length_t 78 enterkey(keybuf, line, size, fieldtable) 79 RECHEADER *keybuf; /* pointer to start of key */ 80 DBT *line; 81 int size; 82 struct field fieldtable[]; 83 { 84 int i; 85 u_char *l_d_mask; 86 u_char *lineend, *pos; 87 u_char *endkey, *keypos; 88 struct coldesc *clpos; 89 int col = 1; 90 struct field *ftpos; 91 l_d_mask = d_mask; 92 pos = (u_char *) line->data - 1; 93 lineend = (u_char *) line->data + line->size-1; 94 /* don't include rec_delimiter */ 95 96 for (i = 0; i < ncols; i++) { 97 clpos = clist + i; 98 for (; (col < clpos->num) && (pos < lineend); col++) { 99 NEXTCOL(pos); 100 } 101 if (pos >= lineend) 102 break; 103 clpos->start = SEP_FLAG ? pos + 1 : pos; 104 NEXTCOL(pos); 105 clpos->end = pos; 106 col++; 107 if (pos >= lineend) { 108 clpos->end = lineend; 109 i++; 110 break; 111 } 112 } 113 for (; i <= ncols; i++) 114 clist[i].start = clist[i].end = lineend; 115 if (clist[0].start < (u_char *) line->data) 116 clist[0].start++; 117 118 keypos = keybuf->data; 119 endkey = (u_char *) keybuf + size - line->size; 120 for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) 121 if ((keypos = enterfield(keypos, endkey, ftpos, 122 fieldtable->flags)) == NULL) 123 return (1); 124 125 keybuf->offset = keypos - keybuf->data; 126 keybuf->length = keybuf->offset + line->size; 127 if (keybuf->length + sizeof(TRECHEADER) > size) { 128 /* line too long for buffer */ 129 return (1); 130 } 131 132 /* 133 * Make [s]radixsort() only sort by relevant part of key if: 134 * 1. we want to choose unique items by relevant field[s] 135 * 2. we want stable sort and so the items should be sorted only by 136 * the relevant field[s] 137 */ 138 if (UNIQUE || (stable_sort && keybuf->offset < line->size)) 139 keypos[-1] = REC_D; 140 141 memcpy(keybuf->data + keybuf->offset, line->data, line->size); 142 return (0); 143 } 144 145 /* 146 * constructs a field (as defined by -k) within a key 147 */ 148 static u_char * 149 enterfield(tablepos, endkey, cur_fld, gflags) 150 struct field *cur_fld; 151 u_char *tablepos, *endkey; 152 int gflags; 153 { 154 u_char *start, *end, *lineend, *mask, *lweight; 155 struct column icol, tcol; 156 u_int flags; 157 u_int Rflag; 158 159 icol = cur_fld->icol; 160 tcol = cur_fld->tcol; 161 flags = cur_fld->flags; 162 start = icol.p->start; 163 lineend = clist[ncols].end; 164 if (flags & BI) 165 blancmange(start); 166 start += icol.indent; 167 start = min(start, lineend); 168 169 if (!tcol.num) 170 end = lineend; 171 else { 172 if (tcol.indent) { 173 end = tcol.p->start; 174 if (flags & BT) 175 blancmange(end); 176 end += tcol.indent; 177 end = min(end, lineend); 178 } else 179 end = tcol.p->end; 180 } 181 182 if (flags & N) { 183 Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; 184 return number(tablepos, endkey, start, end, Rflag); 185 } 186 187 mask = cur_fld->mask; 188 lweight = cur_fld->weights; 189 for (; start < end; start++) 190 if (mask[*start]) { 191 if (*start <= 1) { 192 if (tablepos+2 >= endkey) 193 return (NULL); 194 *tablepos++ = lweight[1]; 195 *tablepos++ = lweight[*start ? 2 : 1]; 196 } else { 197 if (tablepos+1 >= endkey) 198 return (NULL); 199 *tablepos++ = lweight[*start]; 200 } 201 } 202 *tablepos++ = lweight[0]; 203 return (tablepos == endkey ? NULL : tablepos); 204 } 205 206 /* Uses the first bin to assign sign, expsign, 0, and the first 207 * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). 208 * When sorting in forward order: 209 * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; 210 * else use (0-99)->(2-102). 211 * If the exponent is >=61, use another byte for each additional 253 212 * in the exponent. Cutoff is at 567. 213 * To avoid confusing the exponent and the mantissa, use a field delimiter 214 * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the 215 * only time a field delimiter can come in that position. 216 * Reverse order is done analagously. 217 */ 218 219 static u_char * 220 number(pos, bufend, line, lineend, Rflag) 221 u_char *line, *pos, *bufend, *lineend; 222 int Rflag; 223 { 224 int or_sign, parity = 0; 225 int expincr = 1, exponent = -1; 226 int bite, expsign = 1, sign = 1; 227 u_char lastvalue, *nonzero, *tline, *C_TENS; 228 u_char *nweights; 229 230 if (Rflag) 231 nweights = rnum; 232 else 233 nweights = fnum; 234 if (pos > bufend - 8) 235 return (NULL); 236 /* 237 * or_sign sets the sort direction: 238 * (-r: +/-)(sign: +/-)(expsign: +/-) 239 */ 240 or_sign = sign ^ expsign ^ Rflag; 241 blancmange(line); 242 if (*line == '-') { /* set the sign */ 243 or_sign ^= 1; 244 sign = 0; 245 line++; 246 } 247 /* eat initial zeroes */ 248 for (; *line == '0' && line < lineend; line++) 249 ; 250 /* calculate exponents < 0 */ 251 if (*line == DECIMAL) { 252 exponent = 1; 253 while (*++line == '0' && line < lineend) 254 exponent++; 255 expincr = 0; 256 expsign = 0; 257 } 258 /* next character better be a digit */ 259 if (*line < '1' || *line > '9' || line >= lineend) { 260 *pos++ = nweights[127]; 261 return (pos); 262 } 263 if (expincr) { 264 for (tline = line-1; *++tline >= '0' && 265 *tline <= '9' && tline < lineend;) 266 exponent++; 267 } 268 if (exponent > 567) { 269 *pos++ = nweights[sign ? (expsign ? 254 : 128) 270 : (expsign ? 0 : 126)]; 271 warnx("exponent out of bounds"); 272 return (pos); 273 } 274 bite = min(exponent, 61); 275 *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) 276 : (expsign ? 64-bite : 64+bite)]; 277 if (bite >= 61) { 278 do { 279 exponent -= bite; 280 bite = min(exponent, 254); 281 *pos++ = nweights[or_sign ? 254-bite : bite]; 282 } while (bite == 254); 283 } 284 C_TENS = or_sign ? OFF_NTENS : OFF_TENS; 285 for (; line < lineend; line++) { 286 if (*line >= '0' && *line <= '9') { 287 if (parity) { 288 *pos++ = C_TENS[lastvalue] + (or_sign ? - *line 289 : *line); 290 if (pos == bufend) 291 return (NULL); 292 if (*line != '0' || lastvalue != '0') 293 nonzero = pos; 294 } else 295 lastvalue = *line; 296 parity ^= 1; 297 } else if(*line == DECIMAL) { 298 if(!expincr) /* a decimal already occurred once */ 299 break; 300 expincr = 0; 301 } else 302 break; 303 } 304 if (parity && lastvalue != '0') { 305 *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : 306 OFF_TENS[lastvalue] + '0'; 307 } else 308 pos = nonzero; 309 if (pos > bufend-1) 310 return (NULL); 311 *pos++ = or_sign ? nweights[254] : nweights[0]; 312 return (pos); 313 } 314 315 /* This forces a gap around the record delimiter 316 * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); 317 * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) 318 */ 319 void 320 num_init() 321 { 322 int i; 323 TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; 324 NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; 325 OFF_TENS = TENS - '0'; 326 OFF_NTENS = NEGTENS - '0'; 327 for (i = 1; i < 10; i++) { 328 TENS[i] = TENS[i - 1] + 10; 329 NEGTENS[i] = NEGTENS[i - 1] - 10; 330 } 331 for (i = 0; i < REC_D; i++) { 332 fnum[i] = i; 333 rnum[255 - i] = i; 334 } 335 for (i = REC_D; i <255; i++) { 336 fnum[i] = i + 1; 337 rnum[255 - i] = i - 1; 338 } 339 } 340