1 /*
2  *	index.c
3  *
4  *	routines that deal with the index file
5  *
6  *	$Id: index.c,v 1.97 2012/05/04 19:46:26 conrads Exp $
7  *
8  *****************************************************************************/
9 
10 #include <limits.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <sys/param.h>
16 #include <sys/stat.h>
17 #include <sys/utsname.h>
18 
19 #include "mkreadmes.h"
20 
21 char	*index_field[NUMINDEXFIELDS];	/* array of pointers to the index
22 											fields for the current port
23 										 */
24 char	*index_buf = NULL;				/* pointer to the index file buffer */
25 
26 extern	char *(*index_categories)[];	/* pointer to an array of pointers
27 											to the first line of each
28 											category's section within the
29 											index buffer
30 										 */
31 extern	char   *next_index;	/* pointer to the next index line to search from */
32 extern	char   *next_cat;	/* pointer to the first line of next category */
33 
34 extern	char	ports_dir[PATH_MAX];	/* top-level directory */
35 
36 extern	size_t	num_categories;		/* number of categories found in top-level
37 										Makefile
38 									 */
39 /* prototypes for global functions */
40 
41 int		load_index(void);
42 int		search_index(const char *path);
43 int		set_index_categories(void);
44 
45 /* prototypes for routines in other source files */
46 
47 extern	char  *load_file(const char *path);						   /* util.c */
48 extern	char  *skip_leading(const char* str, const char* pattern); /* util.c */
49 
50 /*****************************************************************************/
51 
split_index_line(const char * line)52 static inline void split_index_line(const char *line)
53 /*
54 	convert the index line pointed to by 'line' into an array of nul-terminated
55 	strings stored in index_line[], saving pointers to each field in the global
56 	index_field[] array
57  */
58 {
59 	static char index_line[MAXINDEXLINE];
60 
61 	char *ptr = index_line;
62 	int i = 0;						/* index into index_field[] array */
63 
64 	while (*line)					/* do until end of input line */
65 	{
66 		index_field[i++] = ptr;		/* save a pointer to this field */
67 
68 		while(*line)	/* copy field data */
69 		{
70 			 if (*line == FIELDSEPCHAR)
71 				 break;
72 			*ptr++ = *line++;
73 		}
74 
75 		*ptr++ = '\0';				/* mark end of field */
76 
77 		if (*line)					/* end of line? */
78 			++line;
79 	}
80 }
81 
82 /*****************************************************************************/
83 
search_index(const char * path)84 int search_index(const char *path)
85 /*
86 	Find a port's entry in the index buffer by first converting its real path
87 	to the canonical form used in the index file's PORT field, then comparing
88 	it to the PORT index field, and if found, convert its index fields into an
89 	array of strings stored in index_line[], saving pointers to each field in
90 	the index_field[] array
91 
92 	On entry, the global variable 'next_index' will already be pointing to the
93 	first line in the index buffer to check, and next_cat will be pointing to
94 	the first index line of the next category (the search cutoff point)
95 
96 	If called from make_readme_category(), this function relies on the category
97 	Makefile being properly sorted.  Missorted SUBDIR entries will cause
98 	searches for some ports to fail, as after a successful search for the
99 	missorted item, the next_index variable will be incremented to the next
100 	line in the index, but this will not match what the Makefile says the next
101 	entry should be.
102 
103 	Example (this really happened just recently; I did a send-pr afterwards re:
104 	the missorted item):
105 
106 	Let's say that lang/Makefile contains the following series of entries:
107 
108 	SUBDIR += perl5.8
109 	SUBDIR += perl5.10
110 	SUBDIR += perl5.12
111 	SUBDIR += perl5.14
112 
113 	(the "perl5.8" item is missorted, and should come after the other three
114 	entries)
115 
116 	The first search, for perl5.8, will succeed, incrementing next_index to
117 	point to the next index entry after perl5.8's, which, unfortunately, will
118 	not be for perl5.10, but for the port that should have been after perl5.8
119 	in the Makefile (which at the time of this writing is lang/petite-chez).
120 	So then, none of the other perl5* ports will be found, as their index file
121 	entries actually precede perl5.8's and have already been passed over by the
122 	next_index pointer.
123 
124 	A case could be made for not relying on the sorting of the category
125 	Makefiles, but the performance benefit from using this method is
126 	significant, and no real harm results if the assumption turns out to be
127 	wrong.  Besides which, the Makefiles should be properly sorted, so
128 	mkreadmes actually turns out to be a handy tool for detecting these
129 	missorted items!
130 
131 	Returns 1 if found, 0 otherwise
132 */
133 {
134 	char   *port;		/* pointer to PORT field in current index line */
135 	size_t	len;		/* length of PORT field */
136 
137 	char	port_path[PATH_MAX] = PORTSTOP;	/* the canonical "/usr/ports" */
138 	char   *index_ptr = next_index;	/* pointer into the index buffer */
139 
140 	/* scan the index file buffer one line at a time, starting at *next_index,
141 		comparing the port's path against the current index line's PORT field
142 
143 		the search will terminate with failure if index_ptr runs into the start
144 		of the next category's section in the index buffer (or the end of the
145 		buffer, if we're searching within the last category in the index)
146 	 */
147 
148 	/* append category/port from path to PORTSTOP in port_path */
149 	strcat(port_path, skip_leading(path, ports_dir));
150 
151 	/* Do NOT use an "(index_ptr < next_cat)" test here, as next_cat
152 		may equal NULL (if we're in the last category)
153 	 */
154 	while (index_ptr != next_cat)	/* only search one category */
155 	{
156 		/* skip past the first FIELDSEPCHAR and set 'port' to point to
157 			the PORT field
158 		 */
159 		port = index_ptr;
160 
161 		while (*port++ != FIELDSEPCHAR);
162 
163 		/* get length of PORT field */
164 		len = strcspn(port, FIELDSEPSTR);
165 
166 		/* compare port_path to the PORT field */
167 		if ((len == strlen(port_path)) && (strncmp(port, port_path, len) == 0))
168 		{
169 			/* we have a match
170 
171 				break the index line into its individual fields for later use,
172 				storing pointers to each field in the global index_field[] array
173 			 */
174 			split_index_line(index_ptr);
175 
176 			/* bump the next_index pointer to the next line in the index buffer,
177 				for any subsequent searches within the same category
178 			 */
179 			next_index = index_ptr + strlen(index_ptr) + 1;
180 			return 1;
181 		}
182 
183 		index_ptr += strlen(index_ptr) + 1;	/* bump pointer to next line and
184 												try again */
185 	}
186 
187 	/* falling through to here means no match found for this port */
188 	return 0;
189 }
190 
191 /*****************************************************************************/
192 
get_index_line_category(char category[],char * line)193 static inline char *get_index_line_category(char category[], char *line)
194 /*
195 	find the category field within an index line, chop off all but the first
196 	category name, and copy it into category[]
197 
198 	returns a pointer to category
199 */
200 {
201 	int		field;
202 	char   *ptr = category;
203 
204 	/* locate the category field */
205 
206 	for (field = PKG; field < CATEGORY; ++field)
207 	{
208 		line = strchr(line, FIELDSEPCHAR) + 1;
209 	}
210 
211 	/* OK, now we're pointing at the first character of the CATEGORY field;
212 		copy the first category name only
213 	 */
214 	while ((*line != ' ') && (*line != '|'))
215 	{
216 			*ptr++ = *line++;
217 	}
218 
219 	*ptr = '\0';			/* and nul-terminate the category */
220 	return category;
221 }
222 
223 /*****************************************************************************/
224 
validate_index_line(const char * line)225 static inline int validate_index_line(const char *line)
226 /*
227 	Make sure all of the fields we need in an index line actually contain data
228 
229 	Returns 1 if line is valid, 0 if any of the tested fields are empty
230  */
231 {
232 	split_index_line(line);
233 
234 	/* a very simple test; just make sure the first character of each field
235 		is not nul; much faster than using strlen() as we were earlier
236 
237 		only tests the fields which *must* contain something
238 	 */
239 	return (*index_field[PKG] &&
240 		 	*index_field[PORT] &&
241 			*index_field[COMMENT] &&
242 			*index_field[DESCR] &&
243 			*index_field[EMAIL] &&
244 			*index_field[CATEGORY]
245 			) ? 1 : 0;
246 }
247 
248 /*****************************************************************************/
249 
set_index_categories(void)250 int set_index_categories(void)
251 /*
252 	setup an array of pointers, cross-indexed against the categories list,
253 	to the first line of each category's section within the index buffer.
254 
255 	this will allow us to easily find the start of a category within the index
256 	buffer by name, and speed up port searches quite dramatically by limiting
257 	our searches to only the category to which a given port belongs.
258 
259 	on entry, the index buffer will contain a series of lines, each terminated
260 	with a newline character, with a single nul end-of-file marker
261 
262 	this routine will replace each newline with a nul character, transforming
263 	the buffer into an array of strings, with a single extra nul end-of-file
264 	marker at the end
265 
266 	each index line will be validated as well, to ensure all of the needed
267 	fields actually contain data, returning an error if an improperly formatted
268 	line is encountered
269 
270 	returns 0 on success, -1 on error
271 */
272 {
273 	char	this_category[MAXCATNAME];	/* current category */
274 	char	next_category[MAXCATNAME];	/* next category */
275 	size_t	len;						/* length of current index line */
276 
277 	char   *index_ptr = index_buf;	/* pointer into index_buf */
278 	int		i = 0;					/* index into index_categories[] array */
279 	int		line_no = 1;			/* track line numbers as we go, in case we
280 										need to report an improperly formatted
281 										index line
282 									 */
283 
284 	/* allocate memory for the array where we'll save pointers to the first
285 		line of each category's section within the index buffer
286 	 */
287 	if ((index_categories = calloc((num_categories + 1),
288 									sizeof(char *))) == NULL)
289 	{
290 		perror("calloc() failure");
291 		return -1;
292 	}
293 
294 	/* scan the index buffer one newline-terminated line at a time,
295 		replacing the newline with nul.
296 		whenever a new category is found, add a pointer to the line
297 		to the index_categories[] array
298 	 */
299 	while (*index_ptr)
300 	{
301 		/* whenever we're at the top of this loop, index_ptr will be pointing
302 			at the start of a new category section in the index buffer (or end
303 			of buffer)
304 		 */
305 		/* replace newline with nul, setting len while we're at it */
306 		index_ptr[len = strcspn(index_ptr, "\n")]= '\0';
307 
308 		/* Make sure the line is properly formatted */
309 		if (!validate_index_line(index_ptr))
310 		{
311 			fprintf(stderr, "Error in index file line %d: line = \"%s\"\n",
312 				line_no, index_ptr);
313 			return -1;
314 		}
315 
316 		(*index_categories)[i++] = index_ptr;	/* save a pointer to this
317 													category's section
318 												 */
319 		/* on all but the first pass, next_category will already contain
320 			the name of the new category, so we can avoid a needless
321 			duplication of effort here */
322 		if (index_ptr == index_buf)
323 			/* copy this line's category into this_category */
324 			get_index_line_category(this_category, index_ptr);
325 		else
326 			/* copy the category we already picked up on the last pass */
327 			strcpy(this_category, next_category);
328 
329 		/* now keep bumping index_ptr line by line until we hit a
330 			new category, or reach end of file
331 		 */
332 		while (*(index_ptr += len + 1))
333 		{
334 			++line_no;					/* bump the line number counter */
335 
336 			/* replace newline with nul, setting len while we're at it */
337 			index_ptr[len = strcspn(index_ptr, "\n")]= '\0';
338 
339 			/* Make sure the line is properly formatted */
340 			if (!validate_index_line(index_ptr))
341 			{
342 				fprintf(stderr, "Error in index file line %d: line = \"%s\"\n",
343 					line_no, index_ptr);
344 				return -1;
345 			}
346 
347 			/* get this line's category and compare it to current category */
348 			get_index_line_category(next_category, index_ptr);
349 
350 			if (strcmp(this_category, next_category) != 0)
351 				break;	/* we've hit a new category,
352 							back to top of outer loop */
353 		}
354 	}
355 
356 	(*index_categories)[i] = NULL;	/* mark end of list */
357 
358 	return 0;
359 }
360 
361 /*****************************************************************************/
362 
gen_index_path(char * index_path)363 static int gen_index_path(char *index_path)
364 /*
365  *	generate the fully qualified pathname to the index file,
366  *	storing it back into index_path[]
367  *
368  *	returns 0 on success, or -1 on error
369  */
370 {
371 	struct	utsname	uname_struct;		/* information returned by uname() */
372 
373 	/* get the OS release number */
374 	if (uname(&uname_struct) == -1)
375 	{
376 		perror("uname()");
377 		return -1;
378 	}
379 
380 	/* set index_path to ports_dir/INDEXNAME */
381 	strcpy(index_path, ports_dir);
382 	strcat(index_path, INDEXNAME);	/* index name minus OS release number */
383 
384 	/* append release number to pathname */
385 	strncat(index_path, uname_struct.release,
386 		strspn(uname_struct.release, "0123456789"));
387 
388 	return 0;
389 }
390 
391 /*****************************************************************************/
392 
load_index(void)393 int load_index(void)
394 /*
395 	Load the index file into a dynamically allocated buffer which will be used
396 	throughout the lifetime of the program to ensure the fastest possible
397 	access to the file's data.
398 
399 	A pointer to the file's location in memory will be stored in the global
400 	variable index_buf.
401 
402 	Returns 0 on success, or -1 on failure
403 */
404 {
405 	char	index_path[PATH_MAX];
406 
407 	/* get the fully qualified pathname into index_path */
408 	if (gen_index_path(index_path) != 0)
409 		return -1;
410 
411 	/* load the file into memory, saving its address in the global index_buf */
412 	if ((index_buf = load_file(index_path)) == NULL)
413 		return -1;
414 
415 	return 0;
416 }
417 
418