1 /* $NetBSD: uniq.c,v 1.6 2014/06/21 17:48:07 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christos Zoulas. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: uniq.c,v 1.6 2014/06/21 17:48:07 christos Exp $"); 33 34 #include <stdio.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <db.h> 38 #include <err.h> 39 #include <util.h> 40 #include <ctype.h> 41 #include <fcntl.h> 42 43 #include "extern.h" 44 45 static const HASHINFO hinfo = { 46 .bsize = 256, 47 .ffactor = 4, 48 .nelem = 32768, 49 .cachesize = 1024, 50 .hash = NULL, 51 .lorder = 0 52 }; 53 54 static int comp(const char *, char **, size_t *); 55 56 /* 57 * Preserve only unique content lines in a file. Input lines that have 58 * content [alphanumeric characters before a comment] are white-space 59 * normalized and have their comments removed. Then they are placed 60 * in a hash table, and only the first instance of them is printed. 61 * Comment lines without any alphanumeric content are always printed 62 * since they are there to make the file "pretty". Comment lines with 63 * alphanumeric content are also placed into the hash table and only 64 * printed once. 65 */ 66 void 67 uniq(const char *fname) 68 { 69 DB *db; 70 DBT key; 71 static const DBT data = { NULL, 0 }; 72 FILE *fp; 73 char *line; 74 size_t len; 75 76 if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL) 77 err(1, "Cannot create in memory database"); 78 79 fp = efopen(fname, "r"); 80 while ((line = fgetln(fp, &len)) != NULL) { 81 size_t complen = len; 82 char *compline; 83 if (!comp(line, &compline, &complen)) { 84 (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 85 line); 86 continue; 87 } 88 key.data = compline; 89 key.size = complen; 90 switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) { 91 case 0: 92 (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 93 line); 94 break; 95 case 1: 96 break; 97 case -1: 98 err(1, "put"); 99 /*NOTREACHED*/ 100 default: 101 abort(); 102 break; 103 } 104 } 105 (void)fflush(stdout); 106 exit(0); 107 } 108 109 /* 110 * normalize whitespace in the original line and place a new string 111 * with whitespace converted to a single space in compline. If the line 112 * contains just comments, we preserve them. If it contains data and 113 * comments, we kill the comments. Return 1 if the line had actual 114 * contents, or 0 if it was just a comment without alphanumeric characters. 115 */ 116 static int 117 comp(const char *origline, char **compline, size_t *len) 118 { 119 const unsigned char *p; 120 unsigned char *q; 121 char *cline; 122 size_t l = *len, complen; 123 int hasalnum, iscomment; 124 125 /* Eat leading space */ 126 for (p = (const unsigned char *)origline; l && *p && isspace(*p); 127 p++, l--) 128 continue; 129 cline = emalloc(l + 1); 130 (void)memcpy(cline, p, l); 131 cline[l] = '\0'; 132 if (*cline == '\0') 133 return 0; 134 135 complen = 0; 136 hasalnum = 0; 137 iscomment = 0; 138 139 for (q = (unsigned char *)cline; l && *p; p++, l--) { 140 if (isspace(*p)) { 141 if (complen && isspace(q[-1])) 142 continue; 143 *q++ = ' '; 144 complen++; 145 } else { 146 if (!iscomment && *p == '#') { 147 if (hasalnum) 148 break; 149 iscomment = 1; 150 } else 151 hasalnum |= isalnum(*p); 152 *q++ = *p; 153 complen++; 154 } 155 } 156 157 /* Eat trailing space */ 158 while (complen && isspace(q[-1])) { 159 --q; 160 --complen; 161 } 162 *q = '\0'; 163 *compline = cline; 164 *len = complen; 165 return hasalnum; 166 } 167