xref: /netbsd/external/gpl2/texinfo/dist/info/search.c (revision dc174305)
1 /*	$NetBSD: search.c,v 1.1.1.1 2016/01/14 00:11:29 christos Exp $	*/
2 
3 /* search.c -- searching large bodies of text.
4    Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp
5 
6    Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 
22    Written by Brian Fox (bfox@ai.mit.edu). */
23 
24 #include "info.h"
25 
26 #include "search.h"
27 #include "nodes.h"
28 
29 /* The search functions take two arguments:
30 
31      1) a string to search for, and
32 
33      2) a pointer to a SEARCH_BINDING which contains the buffer, start,
34         and end of the search.
35 
36    They return a long, which is the offset from the start of the buffer
37    at which the match was found.  An offset of -1 indicates failure. */
38 
39 /* A function which makes a binding with buffer and bounds. */
40 SEARCH_BINDING *
make_binding(char * buffer,long int start,long int end)41 make_binding (char *buffer, long int start, long int end)
42 {
43   SEARCH_BINDING *binding;
44 
45   binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING));
46   binding->buffer = buffer;
47   binding->start = start;
48   binding->end = end;
49   binding->flags = 0;
50 
51   return (binding);
52 }
53 
54 /* Make a copy of BINDING without duplicating the data. */
55 SEARCH_BINDING *
copy_binding(SEARCH_BINDING * binding)56 copy_binding (SEARCH_BINDING *binding)
57 {
58   SEARCH_BINDING *copy;
59 
60   copy = make_binding (binding->buffer, binding->start, binding->end);
61   copy->flags = binding->flags;
62   return (copy);
63 }
64 
65 
66 /* **************************************************************** */
67 /*                                                                  */
68 /*                 The Actual Searching Functions                   */
69 /*                                                                  */
70 /* **************************************************************** */
71 
72 /* Search forwards or backwards for the text delimited by BINDING.
73    The search is forwards if BINDING->start is greater than BINDING->end. */
74 long
search(char * string,SEARCH_BINDING * binding)75 search (char *string, SEARCH_BINDING *binding)
76 {
77   long result;
78 
79   /* If the search is backwards, then search backwards, otherwise forwards. */
80   if (binding->start > binding->end)
81     result = search_backward (string, binding);
82   else
83     result = search_forward (string, binding);
84 
85   return (result);
86 }
87 
88 /* Search forwards for STRING through the text delimited in BINDING. */
89 long
search_forward(char * string,SEARCH_BINDING * binding)90 search_forward (char *string, SEARCH_BINDING *binding)
91 {
92   register int c, i, len;
93   register char *buff, *end;
94   char *alternate = (char *)NULL;
95 
96   len = strlen (string);
97 
98   /* We match characters in the search buffer against STRING and ALTERNATE.
99      ALTERNATE is a case reversed version of STRING; this is cheaper than
100      case folding each character before comparison.   Alternate is only
101      used if the case folding bit is turned on in the passed BINDING. */
102 
103   if (binding->flags & S_FoldCase)
104     {
105       alternate = xstrdup (string);
106 
107       for (i = 0; i < len; i++)
108         {
109           if (islower (alternate[i]))
110             alternate[i] = toupper (alternate[i]);
111           else if (isupper (alternate[i]))
112             alternate[i] = tolower (alternate[i]);
113         }
114     }
115 
116   buff = binding->buffer + binding->start;
117   end = binding->buffer + binding->end + 1;
118 
119   while (buff < (end - len))
120     {
121       for (i = 0; i < len; i++)
122         {
123           c = buff[i];
124 
125           if ((c != string[i]) && (!alternate || c != alternate[i]))
126             break;
127         }
128 
129       if (!string[i])
130         {
131           if (alternate)
132             free (alternate);
133           if (binding->flags & S_SkipDest)
134             buff += len;
135           return ((long) (buff - binding->buffer));
136         }
137 
138       buff++;
139     }
140 
141   if (alternate)
142     free (alternate);
143 
144   return ((long) -1);
145 }
146 
147 /* Search for STRING backwards through the text delimited in BINDING. */
148 long
search_backward(char * input_string,SEARCH_BINDING * binding)149 search_backward (char *input_string, SEARCH_BINDING *binding)
150 {
151   register int c, i, len;
152   register char *buff, *end;
153   char *string;
154   char *alternate = (char *)NULL;
155 
156   len = strlen (input_string);
157 
158   /* Reverse the characters in the search string. */
159   string = (char *)xmalloc (1 + len);
160   for (c = 0, i = len - 1; input_string[c]; c++, i--)
161     string[i] = input_string[c];
162 
163   string[c] = '\0';
164 
165   /* We match characters in the search buffer against STRING and ALTERNATE.
166      ALTERNATE is a case reversed version of STRING; this is cheaper than
167      case folding each character before comparison.   ALTERNATE is only
168      used if the case folding bit is turned on in the passed BINDING. */
169 
170   if (binding->flags & S_FoldCase)
171     {
172       alternate = xstrdup (string);
173 
174       for (i = 0; i < len; i++)
175         {
176           if (islower (alternate[i]))
177             alternate[i] = toupper (alternate[i]);
178           else if (isupper (alternate[i]))
179             alternate[i] = tolower (alternate[i]);
180         }
181     }
182 
183   buff = binding->buffer + binding->start - 1;
184   end = binding->buffer + binding->end;
185 
186   while (buff > (end + len))
187     {
188       for (i = 0; i < len; i++)
189         {
190           c = *(buff - i);
191 
192           if (c != string[i] && (!alternate || c != alternate[i]))
193             break;
194         }
195 
196       if (!string[i])
197         {
198           free (string);
199           if (alternate)
200             free (alternate);
201 
202           if (binding->flags & S_SkipDest)
203             buff -= len;
204           return ((long) (1 + (buff - binding->buffer)));
205         }
206 
207       buff--;
208     }
209 
210   free (string);
211   if (alternate)
212     free (alternate);
213 
214   return ((long) -1);
215 }
216 
217 /* Find STRING in LINE, returning the offset of the end of the string.
218    Return an offset of -1 if STRING does not appear in LINE.  The search
219    is bound by the end of the line (i.e., either NEWLINE or 0). */
220 int
string_in_line(char * string,char * line)221 string_in_line (char *string, char *line)
222 {
223   register int end;
224   SEARCH_BINDING binding;
225 
226   /* Find the end of the line. */
227   for (end = 0; line[end] && line[end] != '\n'; end++);
228 
229   /* Search for STRING within these confines. */
230   binding.buffer = line;
231   binding.start = 0;
232   binding.end = end;
233   binding.flags = S_FoldCase | S_SkipDest;
234 
235   return (search_forward (string, &binding));
236 }
237 
238 /* Return non-zero if STRING is the first text to appear at BINDING. */
239 int
looking_at(char * string,SEARCH_BINDING * binding)240 looking_at (char *string, SEARCH_BINDING *binding)
241 {
242   long search_end;
243 
244   search_end = search (string, binding);
245 
246   /* If the string was not found, SEARCH_END is -1.  If the string was found,
247      but not right away, SEARCH_END is != binding->start.  Otherwise, the
248      string was found at binding->start. */
249   return (search_end == binding->start);
250 }
251 
252 /* **************************************************************** */
253 /*                                                                  */
254 /*                    Small String Searches                         */
255 /*                                                                  */
256 /* **************************************************************** */
257 
258 /* Function names that start with "skip" are passed a string, and return
259    an offset from the start of that string.  Function names that start
260    with "find" are passed a SEARCH_BINDING, and return an absolute position
261    marker of the item being searched for.  "Find" functions return a value
262    of -1 if the item being looked for couldn't be found. */
263 
264 /* Return the index of the first non-whitespace character in STRING. */
265 int
skip_whitespace(char * string)266 skip_whitespace (char *string)
267 {
268   register int i;
269 
270   for (i = 0; string && whitespace (string[i]); i++);
271   return (i);
272 }
273 
274 /* Return the index of the first non-whitespace or newline character in
275    STRING. */
276 int
skip_whitespace_and_newlines(char * string)277 skip_whitespace_and_newlines (char *string)
278 {
279   register int i;
280 
281   for (i = 0; string && whitespace_or_newline (string[i]); i++);
282   return (i);
283 }
284 
285 /* Return the index of the first whitespace character in STRING. */
286 int
skip_non_whitespace(char * string)287 skip_non_whitespace (char *string)
288 {
289   register int i;
290 
291   for (i = 0; string && string[i] && !whitespace (string[i]); i++);
292   return (i);
293 }
294 
295 /* Return the index of the first non-node character in STRING.  Note that
296    this function contains quite a bit of hair to ignore periods in some
297    special cases.  This is because we here at GNU ship some info files which
298    contain nodenames that contain periods.  No such nodename can start with
299    a period, or continue with whitespace, newline, or ')' immediately following
300    the period.  If second argument NEWLINES_OKAY is non-zero, newlines should
301    be skipped while parsing out the nodename specification. */
302 int
skip_node_characters(char * string,int newlines_okay)303 skip_node_characters (char *string, int newlines_okay)
304 {
305   register int c, i = 0;
306   int paren_seen = 0;
307   int paren = 0;
308 
309   /* Handle special case.  This is when another function has parsed out the
310      filename component of the node name, and we just want to parse out the
311      nodename proper.  In that case, a period at the start of the nodename
312      indicates an empty nodename. */
313   if (string && *string == '.')
314     return (0);
315 
316   if (string && *string == '(')
317     {
318       paren++;
319       paren_seen++;
320       i++;
321     }
322 
323   for (; string && (c = string[i]); i++)
324     {
325       if (paren)
326         {
327           if (c == '(')
328             paren++;
329           else if (c == ')')
330             paren--;
331 
332           continue;
333         }
334 
335       /* If the character following the close paren is a space or period,
336          then this node name has no more characters associated with it. */
337       if (c == '\t' ||
338           c == ','  ||
339           c == INFO_TAGSEP ||
340           ((!newlines_okay) && (c == '\n')) ||
341           ((paren_seen && string[i - 1] == ')') &&
342            (c == ' ' || c == '.')) ||
343           (c == '.' &&
344            (
345 #if 0
346 /* This test causes a node name ending in a period, like `This.', not to
347    be found.  The trailing . is stripped.  This occurs in the jargon
348    file (`I see no X here.' is a node name).  */
349            (!string[i + 1]) ||
350 #endif
351             (whitespace_or_newline (string[i + 1])) ||
352             (string[i + 1] == ')'))))
353         break;
354     }
355   return (i);
356 }
357 
358 
359 /* **************************************************************** */
360 /*                                                                  */
361 /*                   Searching FILE_BUFFER's                        */
362 /*                                                                  */
363 /* **************************************************************** */
364 
365 /* Return the absolute position of the first occurence of a node separator in
366    BINDING-buffer.  The search starts at BINDING->start.  Return -1 if no node
367    separator was found. */
368 long
find_node_separator(SEARCH_BINDING * binding)369 find_node_separator (SEARCH_BINDING *binding)
370 {
371   register long i;
372   char *body;
373 
374   body = binding->buffer;
375 
376   /* A node is started by [^L]^_[^L]\n.  That is to say, the C-l's are
377      optional, but the DELETE and NEWLINE are not.  This separator holds
378      true for all separated elements in an Info file, including the tags
379      table (if present) and the indirect tags table (if present). */
380   for (i = binding->start; i < binding->end - 1; i++)
381     if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) &&
382          (body[i + 2] == '\n' ||
383           (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) ||
384         ((body[i] == INFO_COOKIE) &&
385          (body[i + 1] == '\n' ||
386           (body[i + 1] == INFO_FF && body[i + 2] == '\n'))))
387       return (i);
388   return (-1);
389 }
390 
391 /* Return the length of the node separator characters that BODY is
392    currently pointing at. */
393 int
skip_node_separator(char * body)394 skip_node_separator (char *body)
395 {
396   register int i;
397 
398   i = 0;
399 
400   if (body[i] == INFO_FF)
401     i++;
402 
403   if (body[i++] != INFO_COOKIE)
404     return (0);
405 
406   if (body[i] == INFO_FF)
407     i++;
408 
409   if (body[i++] != '\n')
410     return (0);
411 
412   return (i);
413 }
414 
415 /* Return the number of characters from STRING to the start of
416    the next line. */
417 int
skip_line(char * string)418 skip_line (char *string)
419 {
420   register int i;
421 
422   for (i = 0; string && string[i] && string[i] != '\n'; i++);
423 
424   if (string[i] == '\n')
425     i++;
426 
427   return (i);
428 }
429 
430 /* Return the absolute position of the beginning of a tags table in this
431    binding starting the search at binding->start. */
432 long
find_tags_table(SEARCH_BINDING * binding)433 find_tags_table (SEARCH_BINDING *binding)
434 {
435   SEARCH_BINDING tmp_search;
436   long position;
437 
438   tmp_search.buffer = binding->buffer;
439   tmp_search.start = binding->start;
440   tmp_search.end = binding->end;
441   tmp_search.flags = S_FoldCase;
442 
443   while ((position = find_node_separator (&tmp_search)) != -1 )
444     {
445       tmp_search.start = position;
446       tmp_search.start += skip_node_separator (tmp_search.buffer
447           + tmp_search.start);
448 
449       if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search))
450         return (position);
451     }
452   return (-1);
453 }
454 
455 /* Return the absolute position of the node named NODENAME in BINDING.
456    This is a brute force search, and we wish to avoid it when possible.
457    This function is called when a tag (indirect or otherwise) doesn't
458    really point to the right node.  It returns the absolute position of
459    the separator preceding the node. */
460 long
find_node_in_binding(char * nodename,SEARCH_BINDING * binding)461 find_node_in_binding (char *nodename, SEARCH_BINDING *binding)
462 {
463   long position;
464   int offset, namelen;
465   SEARCH_BINDING tmp_search;
466 
467   namelen = strlen (nodename);
468 
469   tmp_search.buffer = binding->buffer;
470   tmp_search.start = binding->start;
471   tmp_search.end = binding->end;
472   tmp_search.flags = 0;
473 
474   while ((position = find_node_separator (&tmp_search)) != -1)
475     {
476       tmp_search.start = position;
477       tmp_search.start += skip_node_separator
478         (tmp_search.buffer + tmp_search.start);
479 
480       offset = string_in_line
481         (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start);
482 
483       if (offset == -1)
484         continue;
485 
486       tmp_search.start += offset;
487       tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start);
488       offset = skip_node_characters
489         (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES);
490 
491       /* Notice that this is an exact match.  You cannot grovel through
492          the buffer with this function looking for random nodes. */
493        if ((offset == namelen) &&
494            (tmp_search.buffer[tmp_search.start] == nodename[0]) &&
495            (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0))
496          return (position);
497     }
498   return (-1);
499 }
500