xref: /netbsd/usr.sbin/makemandb/apropos.c (revision cbcd1f0c)
1*cbcd1f0cSgutteridge /*	$NetBSD: apropos.c,v 1.26 2022/05/19 04:08:03 gutteridge Exp $	*/
218810962Sjoerg /*-
318810962Sjoerg  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
418810962Sjoerg  * All rights reserved.
518810962Sjoerg  *
618810962Sjoerg  * This code was developed as part of Google's Summer of Code 2011 program.
718810962Sjoerg  *
818810962Sjoerg  * Redistribution and use in source and binary forms, with or without
918810962Sjoerg  * modification, are permitted provided that the following conditions
1018810962Sjoerg  * are met:
1118810962Sjoerg  *
1218810962Sjoerg  * 1. Redistributions of source code must retain the above copyright
1318810962Sjoerg  *    notice, this list of conditions and the following disclaimer.
1418810962Sjoerg  * 2. Redistributions in binary form must reproduce the above copyright
1518810962Sjoerg  *    notice, this list of conditions and the following disclaimer in
1618810962Sjoerg  *    the documentation and/or other materials provided with the
1718810962Sjoerg  *    distribution.
1818810962Sjoerg  *
1918810962Sjoerg  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2018810962Sjoerg  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2118810962Sjoerg  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2218810962Sjoerg  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
2318810962Sjoerg  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2418810962Sjoerg  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2518810962Sjoerg  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2618810962Sjoerg  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2718810962Sjoerg  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2818810962Sjoerg  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
2918810962Sjoerg  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3018810962Sjoerg  * SUCH DAMAGE.
3118810962Sjoerg  */
3218810962Sjoerg 
3318810962Sjoerg #include <sys/cdefs.h>
34*cbcd1f0cSgutteridge __RCSID("$NetBSD: apropos.c,v 1.26 2022/05/19 04:08:03 gutteridge Exp $");
3518810962Sjoerg 
3618810962Sjoerg #include <err.h>
37*cbcd1f0cSgutteridge #include <signal.h>
3818810962Sjoerg #include <stdio.h>
3918810962Sjoerg #include <stdlib.h>
4018810962Sjoerg #include <string.h>
4118810962Sjoerg #include <unistd.h>
4218810962Sjoerg #include <util.h>
4318810962Sjoerg 
4418810962Sjoerg #include "apropos-utils.h"
4518810962Sjoerg 
4618810962Sjoerg typedef struct apropos_flags {
4754703944Sabhinav 	char **sections;
4818810962Sjoerg 	int nresults;
4918810962Sjoerg 	int pager;
5018810962Sjoerg 	int no_context;
51be3d6103Schristos 	query_format format;
5262c18948Schristos 	int legacy;
5318810962Sjoerg 	const char *machine;
5480158110Sabhinav 	const char *manconf;
5518810962Sjoerg } apropos_flags;
5618810962Sjoerg 
5718810962Sjoerg typedef struct callback_data {
5818810962Sjoerg 	int count;
5918810962Sjoerg 	FILE *out;
6018810962Sjoerg 	apropos_flags *aflags;
6118810962Sjoerg } callback_data;
6218810962Sjoerg 
6318810962Sjoerg static char *remove_stopwords(const char *);
649e944563Sabhinav static int query_callback(query_callback_args *);
6518810962Sjoerg __dead static void usage(void);
6618810962Sjoerg 
6718810962Sjoerg #define _PATH_PAGER	"/usr/bin/more -s"
6854703944Sabhinav #define SECTIONS_ARGS_LENGTH 4;
6918810962Sjoerg 
7062c18948Schristos static void
parseargs(int argc,char ** argv,struct apropos_flags * aflags)7162c18948Schristos parseargs(int argc, char **argv, struct apropos_flags *aflags)
7262c18948Schristos {
7362c18948Schristos 	int ch;
7454703944Sabhinav 	size_t sections_offset = 0;
7554703944Sabhinav 	size_t sections_size = 0;
7654703944Sabhinav 	char **sections = NULL;
7754703944Sabhinav 	char *section;
7880158110Sabhinav 	aflags->manconf = MANCONF;
790842e310Schristos 
8054703944Sabhinav #define RESIZE_SECTIONS(newsize) \
8154703944Sabhinav 	if (sections == NULL || sections_offset > sections_size - 1) { \
8254703944Sabhinav 		sections_size += newsize; \
8354703944Sabhinav 		sections = erealloc(sections, sections_size * sizeof(*sections)); \
8454703944Sabhinav 	}
8554703944Sabhinav 
8680158110Sabhinav 	while ((ch = getopt(argc, argv, "123456789C:hilMmn:PprS:s:")) != -1) {
8762c18948Schristos 		switch (ch) {
8862c18948Schristos 		case '1':
8962c18948Schristos 		case '2':
9062c18948Schristos 		case '3':
9162c18948Schristos 		case '4':
9262c18948Schristos 		case '5':
9362c18948Schristos 		case '6':
9462c18948Schristos 		case '7':
9562c18948Schristos 		case '8':
9662c18948Schristos 		case '9':
9754703944Sabhinav 			section = emalloc(2);
9854703944Sabhinav 			section[0] = ch;
9954703944Sabhinav 			section[1] = 0;
10054703944Sabhinav 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
10154703944Sabhinav 			sections[sections_offset++] = section;
10262c18948Schristos 			break;
10362c18948Schristos 		case 'C':
10480158110Sabhinav 			aflags->manconf = optarg;
10562c18948Schristos 			break;
106be3d6103Schristos 		case 'h':
107be3d6103Schristos 			aflags->format = APROPOS_HTML;
108be3d6103Schristos 			break;
10962c18948Schristos 		case 'i':
110be3d6103Schristos 			aflags->format = APROPOS_TERM;
11162c18948Schristos 			break;
11262c18948Schristos 		case 'l':
11362c18948Schristos 			aflags->legacy = 1;
11462c18948Schristos 			aflags->no_context = 1;
115be3d6103Schristos 			aflags->format = APROPOS_NONE;
11662c18948Schristos 			break;
11780158110Sabhinav 		case 'M':
11880158110Sabhinav 			aflags->no_context = 1;
11980158110Sabhinav 			break;
12080158110Sabhinav 		case 'm':
12180158110Sabhinav 			aflags->no_context = 0;
12280158110Sabhinav 			break;
12362c18948Schristos 		case 'n':
12462c18948Schristos 			aflags->nresults = atoi(optarg);
12562c18948Schristos 			break;
12662c18948Schristos 		case 'p':	// user wants a pager
12762c18948Schristos 			aflags->pager = 1;
128be3d6103Schristos 			/*FALLTHROUGH*/
129be3d6103Schristos 		case 'P':
130be3d6103Schristos 			aflags->format = APROPOS_PAGER;
13162c18948Schristos 			break;
13262c18948Schristos 		case 'r':
133be3d6103Schristos 			aflags->format = APROPOS_NONE;
13462c18948Schristos 			break;
13562c18948Schristos 		case 'S':
13662c18948Schristos 			aflags->machine = optarg;
13762c18948Schristos 			break;
13862c18948Schristos 		case 's':
13954703944Sabhinav 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
14054703944Sabhinav 			sections[sections_offset++] = estrdup(optarg);
14162c18948Schristos 			break;
14262c18948Schristos 		case '?':
14362c18948Schristos 		default:
14462c18948Schristos 			usage();
14562c18948Schristos 		}
14662c18948Schristos 	}
14754703944Sabhinav 	if (sections) {
14854703944Sabhinav 		RESIZE_SECTIONS(1)
14954703944Sabhinav 		sections[sections_offset] = NULL;
15054703944Sabhinav 	}
15154703944Sabhinav 	aflags->sections = sections;
15262c18948Schristos }
15362c18948Schristos 
15418810962Sjoerg int
main(int argc,char * argv[])15518810962Sjoerg main(int argc, char *argv[])
15618810962Sjoerg {
15718810962Sjoerg 	query_args args;
15818810962Sjoerg 	char *query = NULL;	// the user query
15918810962Sjoerg 	char *errmsg = NULL;
16018810962Sjoerg 	char *str;
1613c2f6c78Sgutteridge 	int pc = 0;
16262c18948Schristos 	int rc = 0;
16354703944Sabhinav 	size_t i;
16487ca7dcaSjoerg 	int s;
16518810962Sjoerg 	callback_data cbdata;
16618810962Sjoerg 	cbdata.out = stdout;		// the default output stream
16718810962Sjoerg 	cbdata.count = 0;
16818810962Sjoerg 	apropos_flags aflags;
16954703944Sabhinav 	aflags.sections = NULL;
17018810962Sjoerg 	cbdata.aflags = &aflags;
17118810962Sjoerg 	sqlite3 *db;
17218810962Sjoerg 	setprogname(argv[0]);
17318810962Sjoerg 	if (argc < 2)
17418810962Sjoerg 		usage();
17518810962Sjoerg 
17618810962Sjoerg 	memset(&aflags, 0, sizeof(aflags));
17718810962Sjoerg 
17862c18948Schristos 	if (!isatty(STDOUT_FILENO))
179be3d6103Schristos 		aflags.format = APROPOS_NONE;
180be3d6103Schristos 	else
181be3d6103Schristos 		aflags.format = APROPOS_TERM;
18262c18948Schristos 
18362c18948Schristos 	if ((str = getenv("APROPOS")) != NULL) {
18462c18948Schristos 		char **ptr = emalloc((strlen(str) + 2) * sizeof(*ptr));
18562c18948Schristos #define WS "\t\n\r "
18662c18948Schristos 		ptr[0] = __UNCONST(getprogname());
18762c18948Schristos 		for (s = 1, str = strtok(str, WS); str;
18862c18948Schristos 		    str = strtok(NULL, WS), s++)
18962c18948Schristos 			ptr[s] = str;
19062c18948Schristos 		ptr[s] = NULL;
19162c18948Schristos 		parseargs(s, ptr, &aflags);
19262c18948Schristos 		free(ptr);
19362c18948Schristos 		optreset = 1;
19462c18948Schristos 		optind = 1;
19518810962Sjoerg 	}
19662c18948Schristos 
19762c18948Schristos 	parseargs(argc, argv, &aflags);
19862c18948Schristos 
19918810962Sjoerg 	argc -= optind;
20018810962Sjoerg 	argv += optind;
20118810962Sjoerg 
20218810962Sjoerg 	if (!argc)
20318810962Sjoerg 		usage();
20418810962Sjoerg 
20518810962Sjoerg 	str = NULL;
20618810962Sjoerg 	while (argc--)
20718810962Sjoerg 		concat(&str, *argv++);
20818810962Sjoerg 	query = remove_stopwords(lower(str));
20918810962Sjoerg 
21060488a8dSchristos 	/*
21160488a8dSchristos 	 * If the query consisted only of stopwords and we removed all of
21260488a8dSchristos 	 * them, use the original query.
21360488a8dSchristos 	 */
21418810962Sjoerg 	if (query == NULL)
21560488a8dSchristos 		query = str;
21660488a8dSchristos 	else
21760488a8dSchristos 		free(str);
21818810962Sjoerg 
21980158110Sabhinav 	if ((db = init_db(MANDB_READONLY, aflags.manconf)) == NULL)
22018810962Sjoerg 		exit(EXIT_FAILURE);
22118810962Sjoerg 
22218810962Sjoerg 	/* If user wants to page the output, then set some settings */
22318810962Sjoerg 	if (aflags.pager) {
22418810962Sjoerg 		const char *pager = getenv("PAGER");
22518810962Sjoerg 		if (pager == NULL)
22618810962Sjoerg 			pager = _PATH_PAGER;
227*cbcd1f0cSgutteridge 
228*cbcd1f0cSgutteridge 		/* Don't get killed by a broken pipe */
229*cbcd1f0cSgutteridge 		signal(SIGPIPE, SIG_IGN);
230*cbcd1f0cSgutteridge 
23118810962Sjoerg 		/* Open a pipe to the pager */
23218810962Sjoerg 		if ((cbdata.out = popen(pager, "w")) == NULL) {
23318810962Sjoerg 			close_db(db);
23418810962Sjoerg 			err(EXIT_FAILURE, "pipe failed");
23518810962Sjoerg 		}
23618810962Sjoerg 	}
23718810962Sjoerg 
23818810962Sjoerg 	args.search_str = query;
23954703944Sabhinav 	args.sections = aflags.sections;
24062c18948Schristos 	args.legacy = aflags.legacy;
241dd6188ceSchristos 	args.nrec = aflags.nresults ? aflags.nresults : -1;
24218810962Sjoerg 	args.offset = 0;
24318810962Sjoerg 	args.machine = aflags.machine;
24418810962Sjoerg 	args.callback = &query_callback;
24518810962Sjoerg 	args.callback_data = &cbdata;
24618810962Sjoerg 	args.errmsg = &errmsg;
2479870a313Schristos 
248be3d6103Schristos 	if (aflags.format == APROPOS_HTML) {
249be3d6103Schristos 		fprintf(cbdata.out, "<html>\n<header>\n<title>apropos results "
250be3d6103Schristos 		    "for %s</title></header>\n<body>\n<table cellpadding=\"4\""
251be3d6103Schristos 		    "style=\"border: 1px solid #000000; border-collapse:"
252be3d6103Schristos 		    "collapse;\" border=\"1\">\n", query);
253be3d6103Schristos 	}
254be3d6103Schristos 	rc = run_query(db, aflags.format, &args);
255be3d6103Schristos 	if (aflags.format == APROPOS_HTML)
256be3d6103Schristos 		fprintf(cbdata.out, "</table>\n</body>\n</html>\n");
25718810962Sjoerg 
2583c2f6c78Sgutteridge 	if (aflags.pager)
2593c2f6c78Sgutteridge 		pc = pclose(cbdata.out);
26018810962Sjoerg 	free(query);
26154703944Sabhinav 
26254703944Sabhinav 	if (aflags.sections) {
26354703944Sabhinav 		for(i = 0; aflags.sections[i]; i++)
26454703944Sabhinav 			free(aflags.sections[i]);
26554703944Sabhinav 		free(aflags.sections);
26654703944Sabhinav 	}
26754703944Sabhinav 
26818810962Sjoerg 	close_db(db);
26918810962Sjoerg 	if (errmsg) {
27018810962Sjoerg 		warnx("%s", errmsg);
27118810962Sjoerg 		free(errmsg);
27218810962Sjoerg 		exit(EXIT_FAILURE);
27318810962Sjoerg 	}
27418810962Sjoerg 
2753c2f6c78Sgutteridge 	if (pc == -1)
2763c2f6c78Sgutteridge 		err(EXIT_FAILURE, "pclose error");
2773c2f6c78Sgutteridge 
278*cbcd1f0cSgutteridge 	/*
279*cbcd1f0cSgutteridge 	 * Something wrong with the database, writing output, or a non-existent
280*cbcd1f0cSgutteridge 	 * pager.
281*cbcd1f0cSgutteridge 	 */
282*cbcd1f0cSgutteridge 	if (rc < 0)
28318810962Sjoerg 		exit(EXIT_FAILURE);
28418810962Sjoerg 
28518810962Sjoerg 	if (cbdata.count == 0) {
28618810962Sjoerg 		warnx("No relevant results obtained.\n"
28787ca7dcaSjoerg 		    "Please make sure that you spelled all the terms correctly "
288da68a3bbSjmcneill 		    "or try using different keywords.");
28918810962Sjoerg 	}
29018810962Sjoerg 	return 0;
29118810962Sjoerg }
29218810962Sjoerg 
29318810962Sjoerg /*
29418810962Sjoerg  * query_callback --
29518810962Sjoerg  *  Callback function for run_query.
296*cbcd1f0cSgutteridge  *  It simply outputs the results from run_query. If the user specified the -p
29718810962Sjoerg  *  option, then the output is sent to a pager, otherwise stdout is the default
29818810962Sjoerg  *  output stream.
29918810962Sjoerg  */
30018810962Sjoerg static int
query_callback(query_callback_args * qargs)3019e944563Sabhinav query_callback(query_callback_args *qargs)
30218810962Sjoerg {
3039e944563Sabhinav 	callback_data *cbdata = (callback_data *) qargs->other_data;
30418810962Sjoerg 	FILE *out = cbdata->out;
30518810962Sjoerg 	cbdata->count++;
306be3d6103Schristos 	if (cbdata->aflags->format != APROPOS_HTML) {
307dd6188ceSchristos 	    fprintf(out, cbdata->aflags->legacy ? "%s(%s) - %s\n" :
3089e944563Sabhinav 		"%s (%s)\t%s\n", qargs->name, qargs->section, qargs->name_desc);
30918810962Sjoerg 	    if (cbdata->aflags->no_context == 0)
3109e944563Sabhinav 		    fprintf(out, "%s\n\n", qargs->snippet);
311be3d6103Schristos 	} else {
3129e944563Sabhinav 	    fprintf(out, "<tr><td>%s(%s)</td><td>%s</td></tr>\n", qargs->name,
3139e944563Sabhinav 		qargs->section, qargs->name_desc);
314be3d6103Schristos 	    if (cbdata->aflags->no_context == 0)
3159e944563Sabhinav 		    fprintf(out, "<tr><td colspan=2>%s</td></tr>\n", qargs->snippet);
316be3d6103Schristos 	}
31718810962Sjoerg 
318*cbcd1f0cSgutteridge 	return fflush(out);
31918810962Sjoerg }
32018810962Sjoerg 
32118810962Sjoerg #include "stopwords.c"
32218810962Sjoerg 
32318810962Sjoerg /*
32418810962Sjoerg  * remove_stopwords--
32518810962Sjoerg  *  Scans the query and removes any stop words from it.
32618810962Sjoerg  *  Returns the modified query or NULL, if it contained only stop words.
32718810962Sjoerg  */
32818810962Sjoerg 
32918810962Sjoerg static char *
remove_stopwords(const char * query)33018810962Sjoerg remove_stopwords(const char *query)
33118810962Sjoerg {
33218810962Sjoerg 	size_t len, idx;
33318810962Sjoerg 	char *output, *buf;
33418810962Sjoerg 	const char *sep, *next;
33518810962Sjoerg 
33618810962Sjoerg 	output = buf = emalloc(strlen(query) + 1);
33718810962Sjoerg 
33818810962Sjoerg 	for (; query[0] != '\0'; query = next) {
33918810962Sjoerg 		sep = strchr(query, ' ');
34018810962Sjoerg 		if (sep == NULL) {
34118810962Sjoerg 			len = strlen(query);
34218810962Sjoerg 			next = query + len;
34318810962Sjoerg 		} else {
34418810962Sjoerg 			len = sep - query;
34518810962Sjoerg 			next = sep + 1;
34618810962Sjoerg 		}
34718810962Sjoerg 		if (len == 0)
34818810962Sjoerg 			continue;
34918810962Sjoerg 		idx = stopwords_hash(query, len);
35018810962Sjoerg 		if (memcmp(stopwords[idx], query, len) == 0 &&
35118810962Sjoerg 		    stopwords[idx][len] == '\0')
35218810962Sjoerg 			continue;
35318810962Sjoerg 		memcpy(buf, query, len);
35418810962Sjoerg 		buf += len;
35518810962Sjoerg 		*buf++ = ' ';
35618810962Sjoerg 	}
35718810962Sjoerg 
35818810962Sjoerg 	if (output == buf) {
35918810962Sjoerg 		free(output);
36018810962Sjoerg 		return NULL;
36118810962Sjoerg 	}
36218810962Sjoerg 	buf[-1] = '\0';
36318810962Sjoerg 	return output;
36418810962Sjoerg }
36518810962Sjoerg 
36618810962Sjoerg /*
36718810962Sjoerg  * usage --
36818810962Sjoerg  *	print usage message and die
36918810962Sjoerg  */
37018810962Sjoerg static void
usage(void)37118810962Sjoerg usage(void)
37218810962Sjoerg {
37380158110Sabhinav 	fprintf(stderr, "Usage: %s [-123456789ilMmpr] [-C path] [-n results] "
374597073a2Swiz 	    "[-S machine] [-s section] query\n",
37518810962Sjoerg 	    getprogname());
37618810962Sjoerg 	exit(1);
37718810962Sjoerg }
378