1 /*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Peter McIlroy. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char sccsid[] = "@(#)fields.c 5.1 (Berkeley) 06/01/93"; 13 #endif /* not lint */ 14 15 /* Subroutines to generate sort keys. */ 16 17 #include "sort.h" 18 19 #define blancmange(ptr) { \ 20 if (BLANK & d_mask[*(ptr)]) \ 21 while (BLANK & d_mask[*(++(ptr))]); \ 22 } 23 24 #define NEXTCOL(pos) { \ 25 if (!SEP_FLAG) \ 26 while (BLANK & l_d_mask[*(++pos)]); \ 27 while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \ 28 } 29 30 extern u_char *enterfield __P((u_char *, u_char *, struct field *, int)); 31 32 extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int)); 33 34 extern struct coldesc clist[(ND+1)*2]; 35 extern int ncols; 36 37 #define DECIMAL '.' 38 #define OFFSET 128 39 40 u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ 41 u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ 42 u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ 43 u_char fnum[NBINS], rnum[NBINS]; 44 45 /* 46 * constructs sort key with leading recheader, followed by the key, 47 * followed by the original line. 48 */ 49 length_t 50 enterkey(keybuf, line, size, fieldtable) 51 struct recheader *keybuf; /* pointer to start of key */ 52 DBT *line; 53 int size; 54 struct field fieldtable[]; 55 { 56 int i; 57 register u_char *l_d_mask; 58 register u_char *lineend, *pos; 59 u_char *endkey, *keypos; 60 register struct coldesc *clpos; 61 register int col = 1; 62 struct field *ftpos; 63 l_d_mask = d_mask; 64 pos = (u_char *) line->data - 1; 65 lineend = (u_char *) line->data + line->size-1; 66 /* don't include rec_delimiter */ 67 keypos = keybuf->data; 68 69 for (i = 0; i < ncols; i++) { 70 clpos = clist + i; 71 for (; (col < clpos->num) && (pos < lineend); col++) 72 { NEXTCOL(pos); } 73 if (pos >= lineend) 74 break; 75 clpos->start = SEP_FLAG ? pos + 1 : pos; 76 NEXTCOL(pos); 77 clpos->end = pos; 78 col++; 79 if (pos >= lineend) { 80 clpos->end = lineend; 81 ++i; 82 break; 83 } 84 } 85 for (; i <= ncols; i++) 86 clist[i].start = clist[i].end = lineend; 87 if (clist[0].start < (u_char *) line->data) 88 ++clist[0].start; 89 endkey = (u_char *) keybuf + size - line->size; 90 for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) 91 if ((keypos = enterfield(keypos, endkey, ftpos, 92 fieldtable->flags)) == NULL) 93 return (1); 94 95 if (UNIQUE) 96 *(keypos-1) = REC_D; 97 keybuf->offset = keypos - keybuf->data; 98 keybuf->length = keybuf->offset + line->size; 99 if (keybuf->length + sizeof(TRECHEADER) > size) 100 return (1); /* line too long for buffer */ 101 memcpy(keybuf->data + keybuf->offset, line->data, line->size); 102 return (0); 103 } 104 105 /* 106 * constructs a field (as defined by -k) within a key 107 */ 108 u_char * 109 enterfield(tablepos, endkey, cur_fld, gflags) 110 struct field *cur_fld; 111 register u_char *tablepos, *endkey; 112 int gflags; 113 { 114 register u_char *start, *end, *lineend, *mask, *lweight; 115 struct column icol, tcol; 116 register u_int flags; 117 u_int Rflag; 118 icol = cur_fld->icol; 119 tcol = cur_fld->tcol; 120 flags = cur_fld->flags; 121 start = icol.p->start; 122 lineend = clist[ncols].end; 123 if (flags & BI) 124 blancmange(start); 125 start += icol.indent; 126 start = min(start, lineend); 127 if (!tcol.num) 128 end = lineend; 129 else { 130 if (tcol.indent) { 131 end = tcol.p->start; 132 if (flags & BT) blancmange(end); 133 end += tcol.indent; 134 end = min(end, lineend); 135 } else 136 end = tcol.p->end; 137 } 138 if (flags & N) { 139 Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; 140 tablepos = number(tablepos, endkey, start, end, Rflag); 141 return (tablepos); 142 } 143 mask = alltable; 144 mask = cur_fld->mask; 145 lweight = cur_fld->weights; 146 for (; start < end; start++) 147 if (mask[*start]) { 148 if (*start <= 1) { 149 if (tablepos+2 >= endkey) 150 return (NULL); 151 *tablepos++ = lweight[1]; 152 *tablepos++ = lweight[*start ? 2 : 1]; 153 } else { 154 *tablepos++ = lweight[*start]; 155 if (tablepos == endkey) 156 return (NULL); 157 } 158 } 159 *tablepos++ = lweight[0]; 160 return (tablepos == endkey ? NULL : tablepos); 161 } 162 163 /* Uses the first bin to assign sign, expsign, 0, and the first 164 * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). 165 * When sorting in forward order: 166 * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; 167 * else use (0-99)->(2-102). 168 * If the exponent is >=61, use another byte for each additional 253 169 * in the exponent. Cutoff is at 567. 170 * To avoid confusing the exponent and the mantissa, use a field delimiter 171 * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the 172 * only time a field delimiter can come in that position. 173 * Reverse order is done analagously. 174 */ 175 176 u_char * 177 number(pos, bufend, line, lineend, Rflag) 178 register u_char *line, *pos, *bufend, *lineend; 179 int Rflag; 180 { 181 register int or_sign, parity = 0; 182 register int expincr = 1, exponent = -1; 183 int bite, expsign = 1, sign = 1; 184 register u_char lastvalue, *nonzero, *tline, *C_TENS; 185 u_char *nweights; 186 187 if (Rflag) 188 nweights = rnum; 189 else 190 nweights = fnum; 191 if (pos > bufend - 8) 192 return (NULL); 193 /* or_sign sets the sort direction: 194 * (-r: +/-)(sign: +/-)(expsign: +/-) */ 195 or_sign = sign ^ expsign ^ Rflag; 196 blancmange(line); 197 if (*line == '-') { /* set the sign */ 198 or_sign ^= 1; 199 sign = 0; 200 line++; 201 } 202 /* eat initial zeroes */ 203 for (; *line == '0' && line < lineend; line++); 204 /* calculate exponents < 0 */ 205 if (*line == DECIMAL) { 206 exponent = 1; 207 while (*++line == '0' && line < lineend) 208 exponent++; 209 expincr = 0; 210 expsign = 0; 211 } 212 /* next character better be a digit */ 213 if (*line < '1' || *line > '9' || line >= lineend) { 214 *pos++ = nweights[127]; 215 return (pos); 216 } 217 if (expincr) { 218 for (tline = line-1; *++tline >= '0' && 219 *tline <= '9' && tline < lineend;) 220 exponent++; 221 } 222 if (exponent > 567) { 223 *pos++ = nweights[sign ? (expsign ? 254 : 128) 224 : (expsign ? 0 : 126)]; 225 warnx("exponent out of bounds"); 226 return (pos); 227 } 228 bite = min(exponent, 61); 229 *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) 230 : (expsign ? 64-bite : 64+bite)]; 231 if (bite >= 61) { 232 do { 233 exponent -= bite; 234 bite = min(exponent, 254); 235 *pos++ = nweights[or_sign ? 254-bite : bite]; 236 } while (bite == 254); 237 } 238 C_TENS = or_sign ? OFF_NTENS : OFF_TENS; 239 for (; line < lineend; line++) { 240 if (*line >= '0' && *line <= '9') { 241 if (parity) { 242 *pos++ = C_TENS[lastvalue] + (or_sign ? - *line 243 : *line); 244 if (pos == bufend) 245 return (NULL); 246 if (*line != '0' || lastvalue != '0') 247 nonzero = pos; 248 } else 249 lastvalue = *line; 250 parity ^= 1; 251 } else if(*line == DECIMAL) { 252 if(!expincr) /* a decimal already occurred once */ 253 break; 254 expincr = 0; 255 } else 256 break; 257 } 258 if (parity && lastvalue != '0') { 259 *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : 260 OFF_TENS[lastvalue] + '0'; 261 } else 262 pos = nonzero; 263 if (pos > bufend-1) 264 return (NULL); 265 *pos++ = or_sign ? nweights[254] : nweights[0]; 266 return (pos); 267 } 268 269 /* This forces a gap around the record delimiter 270 * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); 271 * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) 272 */ 273 void 274 num_init() 275 { 276 int i; 277 TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; 278 NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; 279 OFF_TENS = TENS - '0'; 280 OFF_NTENS = NEGTENS - '0'; 281 for (i = 1; i < 10; i++) { 282 TENS[i] = TENS[i-1] + 10; 283 NEGTENS[i] = NEGTENS[i-1] - 10; 284 } 285 for (i = 0; i < REC_D; i++) { 286 fnum[i] = i; 287 rnum[255-i] = i; 288 } 289 for (i = REC_D; i <255; i++) { 290 fnum[i] = i+1; 291 rnum[255-i] = i-1; 292 } 293 } 294