1 /*
2  * find_pos.c file contains following procedures:
3  * 	find_string_pos_in_buffer
4  *	find_line_pos
5  *	textPosToLineColumn
6  * They are used in the asedit program.
7  *
8  * Last changes: 14 April 1994
9  *
10  */
11 
12 /*
13  * Copyright 1991 - 1994,  Andrzej Stochniol, London, UK
14  *
15  * ASEDIT text editor, both binary and source (hereafter, Software) is
16  * copyrighted by Andrzej Stochniol (hereafter, AS) and ownership remains
17  * with AS.
18  *
19  * AS grants you (hereafter, Licensee) a license to use the Software
20  * for academic, research and internal business purposes only, without a
21  * fee.  Licensee may distribute the binary and source code (if released)
22  * to third parties provided that the copyright notice and this statement
23  * appears on all copies and that no charge is associated with such copies.
24  *
25  * Licensee may make derivative works.  However, if Licensee distributes
26  * any derivative work based on or derived from the Software, then
27  * Licensee will:
28  * (1) notify AS regarding its distribution of the derivative work, and
29  * (2) clearly notify users that such derivative work is a modified version
30  *      and not the original ASEDIT distributed by AS.
31  *
32  * Any Licensee wishing to make commercial use of the Software should
33  * contact AS to negotiate an appropriate license for such commercial use.
34  * Commercial use includes:
35  * (1) integration of all or part of the source code into a product for sale
36  *     or license by or on behalf of Licensee to third parties, or
37  * (2) distribution of the binary code or source code to third parties that
38  *     need it to utilize a commercial product sold or licensed by or on
39  *     behalf of Licensee.
40  *
41  * A. STOCHNIOL MAKES NO REPRESENTATIONS ABOUT THE SUITABILITY OF THIS
42  * SOFTWARE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
43  * IMPLIED WARRANTY.  IN NO EVENT SHALL A. STOCHNIOL BE LIABLE FOR ANY
44  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
45  * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
46  * CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
47  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
48  *
49  * By using or copying this Software, Licensee agrees to abide by the
50  * copyright law and all other applicable laws, and the terms of this
51  * license.
52  * AS shall have the right to terminate this license immediately by
53  * written notice upon Licensee's breach of, or non-compliance with, any
54  * of its terms.  Licensee may be held legally responsible for any
55  * copyright infringement that is caused or encouraged by Licensee's
56  * failure to abide by the terms of this license.
57  *
58  *
59  * 	Andrzej Stochniol	(A.Stochniol@ic.ac.uk)
60  * 	30 Hatch Road
61  * 	London SW16 4PN
62  * 	UK
63  */
64 
65 
66 #include <stdio.h>
67 #include <string.h>
68 #include <ctype.h>		/* to define toupper function */
69 #include <X11/Intrinsic.h>	/* defines Boolean type */
70 
71 
72 /* find_string_pos_in_buffer returns position of the found string in the character buffer;
73    if the text has not been found (or in case of error) returns negative number;
74    The search is started from startpos and is carried forward when "forward"
75    is True (when "forward" is False the search direction is backward).
76    The search is case sensitive for case_sensitive=True and only whole
77    words are found when whole_words_only=True.
78    The startpos and endpos should contain the range where the string should
79    be searched for. When startpos or endpos is < 0 the default value will be
80    assigned for it inside the procedure (i.e. beginnning or end of the file );
81  */
82 
83 #ifdef _NO_PROTO
find_string_pos_in_buffer(searchstring,buf,startpos,endpos,forward,case_sensitive,whole_words_only)84 long find_string_pos_in_buffer(searchstring, buf,
85 			startpos, endpos, forward,
86 			case_sensitive, whole_words_only)
87 char *searchstring;
88 char *buf;
89 long startpos;
90 long endpos;
91 Boolean forward;
92 Boolean case_sensitive;
93 Boolean whole_words_only;
94 #else  /* _NO_PROTO */
95 long find_string_pos_in_buffer(char *searchstring, char *buf,
96 			long startpos, long endpos, Boolean forward,
97 			Boolean case_sensitive, Boolean whole_words_only)
98 #endif
99 {
100     long pos = -1L;
101     long searchlen = strlen(searchstring);	/* length of the search string
102 						   (should be > 0)*/
103     long buf_size = (long) strlen(buf);
104     register long i, n;
105     char *s1, *s2;
106     int  c1, c2;	/* integers used for not case-sensitive search (toupper
107 				is defined for integers; we use them to speed
108 				the process of search)*/
109 
110     if(searchlen <= 0L) return(-3L);
111     if(buf_size <= 0L)      return(-2L);
112 
113     /* setting the default value for negative startpos and/or endpos */
114     if(startpos < 0L)
115     {
116 	if(forward) startpos = 0L;
117 	else	    startpos = buf_size;
118     }
119     if(endpos < 0L)
120     {
121 	if(forward) endpos   = buf_size;
122 	else	    endpos   = 0L;
123     }
124 
125     /* test (and correct) the search range */
126     if(endpos > buf_size)     endpos=buf_size;
127     if(startpos > buf_size) startpos=buf_size;
128 
129     /* there are two major cases case-sensitive and not (they are split to
130        speed the process of the search */
131     if(case_sensitive)
132     {
133       if(forward)
134       {
135 	/* search forward starting from startpos position */
136 
137 	for( i = startpos; i < endpos; i++)
138 	{
139 	  n = searchlen;
140 	  s1 = &buf[i];
141 	  s2 = searchstring;
142 	  while (--n >= 0L && *s1++ == *s2++);
143 	  if(n < 0L)
144 	  {
145 		pos = i;
146 		/* additional check for whole words only - check both ends*/
147 		if(whole_words_only)
148 		{
149 			if((i>0L && (isalnum((unsigned char)buf[i-1]) || buf[i-1] == '_')) ||
150 			    (i+searchlen<buf_size && (isalnum((unsigned char)buf[i+searchlen]) || buf[i+searchlen] == '_') ))
151 				pos = -1;	/* in one or another end there is a letter or digit
152 					   or underscore - do not use that occurence */
153 			else	break;	/* on both ends there is no letter and no digit */
154 		}
155 		else break; /* string found: now "i" shows position of the search string
156 				in the buffer relatively to the buffer beginning */
157 	  }
158 	}
159       }
160       else
161       {
162 	/* search backward starting from startpos-1 position */
163 	for( i = startpos-1; i >= endpos; i--)
164 	{
165 	  n = searchlen;
166 	  s1 = &buf[i];
167 	  s2 = searchstring;
168 	  while (--n >= 0L && *s1++ == *s2++);
169 	  if(n < 0L)
170 	  {
171 		pos = i;
172 		if(whole_words_only)  	/* as above ... */
173 		{
174 			if((i>0L && (isalnum((unsigned char)buf[i-1]) || buf[i-1] == '_')) ||
175 			    (i+searchlen<buf_size && (isalnum((unsigned char)buf[i+searchlen]) || buf[i+searchlen] == '_') ) )
176 				pos = -1;
177 			else	break;
178 		}
179 		else break; /* string found: now "i" shows position of the search string
180 				in the buffer relatively to the buffer beginning */
181 	  }
182 	}
183       }
184     }
185     else
186     {	/* not case-sensitive ... */
187       if(forward)
188       {
189 	/* search forward starting from startpos position */
190 
191 	for( i = startpos; i < endpos; i++)
192 	{
193 	  n = searchlen;
194 	  s1 = &buf[i];
195 	  s2 = searchstring;
196 	  c1 = toupper((unsigned char)*s1);	c2=toupper((unsigned char)*s2);
197 	  while (--n >= 0L && c1 == c2)
198 	  {
199 		c1 = toupper((unsigned char) *(++s1));
200 		c2 = toupper((unsigned char) *(++s2));
201 	  }
202 	  if(n < 0L)
203 	  {
204 		pos = i;
205 		if(whole_words_only)  	/* as above ... */
206 		{
207 			if((i>0L && (isalnum((unsigned char)buf[i-1]) || buf[i-1] == '_')) ||
208 			    (i+searchlen<buf_size && (isalnum((unsigned char)buf[i+searchlen]) || buf[i+searchlen] == '_') ))
209 				pos = -1;	/* in one end or another there is a letter, or a digit
210 					   or the underscore - do not use that occurence */
211 			else	break;	/* on both ends there is no letter and no digit */
212 		}
213 		else break;
214 	  }
215 	}
216       }
217       else
218       {
219 	/* search backward starting from startpos-1 position */
220 	for( i = startpos-1; i >= endpos; i--)
221 	{
222 	  n = searchlen;
223 	  s1 = &buf[i];
224 	  s2 = searchstring;
225 	  c1 = toupper((unsigned char)*s1);	c2=toupper((unsigned char)*s2);
226 	  while (--n >= 0L && c1 == c2)
227 	  {
228 		c1 = toupper((unsigned char) *(++s1));
229 		c2 = toupper((unsigned char) *(++s2));
230 	  }
231 	  if(n < 0L)
232 	  {
233 		pos = i;
234 		if(whole_words_only)  	/* as above ... */
235 		{
236 			if((i>0L && (isalnum((unsigned char)buf[i-1]) || buf[i-1] == '_')) ||
237 			    (i+searchlen<buf_size && (isalnum((unsigned char)buf[i+searchlen]) || buf[i+searchlen] == '_') ) )
238 				pos = -1;
239 			else	break;
240 		}
241 		else break;
242 	  }
243 	}
244       }
245 
246     }
247     /****** TEMP  - for backward DEBUG!!!!!!!
248     	fprintf(stderr, "find_string_pos_in_buffer, startpos , endpos, pos: %ld %ld %ld \n", startpos, endpos, pos);
249     *******/
250     return(pos);
251 
252 }   /* find_string_pos_in_buffer */
253 
254 
255 
256 /* find_line_pos returns position of the line in the character buffer;
257    if such a line has not been found - returns -1 and the maximum
258    number of lines in the buffer buf is passed in found_lines  for message
259    purposes
260  */
261 
262 #ifdef _NO_PROTO
find_line_pos(line,buf,found_lines)263 long find_line_pos(line, buf, found_lines)
264 long line;
265 char *buf;
266 long *found_lines;
267 #else  /* _NO_PROTO */
268 long find_line_pos(long line, char *buf, long *found_lines)
269 #endif
270 {
271     long pos = -1L;
272     char new_line = '\n';
273     long size = strlen(buf);
274     register long i;
275 
276     *found_lines = 0L;
277 
278     if(line == 1L)
279 	pos = 0L;
280     else
281     {
282 	for( i = 0L; i < size; i++)
283 	{
284 	  if(buf[i] == new_line) (*found_lines)++;
285 	  if(*found_lines == (line - 1L))	/* we are on the line ... */
286 		{pos = i + 1L; break; }		/* position of the first character of
287 						   that line */
288 	}
289 	/* extra check for the last line (if the last character was different from
290 	   line feed we have one extra line, because the last line did not end with
291 	   the line feed) */
292 	if(buf[size-1L] != new_line) (*found_lines)++;
293     }
294     return(pos);
295 }   /* find_line_pos */
296 
297 
298 /* textPosToLineColumn converts the text position in the buffer to the
299    appropriate line and column values; buf_size was specially introduce
300    to allow using this procedure for character buffers which may be not
301    NULL terminated but with known size (usually buf_size is simply calculated
302    as (long) strlen(buf) for NULL terminated strings).
303    line -   returns number of lines
304    column - returns column position (including an effect of the tab character(s))
305    column_ntab - returns number of characters up to the column + 1 (tab is 1 "character",
306 		 so we simply disregard tab effect)
307  */
308 
309 
310 #ifdef _NO_PROTO
textPosToLineColumn(buf,buf_size,pos,tab_size,line,column,column_ntab)311 void textPosToLineColumn(buf, buf_size, pos, tab_size, line, column, column_ntab)
312 char *buf;
313 long buf_size;
314 long pos;
315 int  tab_size;
316 long *line;
317 long *column;
318 long *column_ntab;
319 #else  /* _NO_PROTO */
320 void textPosToLineColumn(char *buf, long buf_size, long pos, int tab_size, long *line, long *column,
321 		long *column_ntab)
322 #endif
323 {
324     char new_line = '\n';
325     char tab_char = '\t';
326 
327     register long i, col;
328     long latest_lf_pos = -1L;	/* remeber the position of the latest found line feed */
329 				/* in Motif XmTextPosition is defined as long ... */
330     /* tab_size  		specifies how many characters will move the cursor
331 				   for each tab stop character (default - 8;
332 				   for programmers editors usually between 2 and 16)
333 				   This value should be exactly the same as used
334 				   internally in the text widget  */
335 
336 
337     *line = 1L;
338     *column_ntab = *column = 1L;
339 
340     if(pos <= 0L)   return;
341     else
342     {
343 	for( i = 0L; i < buf_size; i++)
344 	{
345 
346 	  if(i == pos) break;   /* we have reached the right position */
347 	  if(buf[i] == new_line)
348 	  { (*line)++;
349 	   latest_lf_pos = i; }
350 
351 	}
352 	/* setting the column number  - version without taking into account tab characters*/
353 	*column_ntab = *column = pos - latest_lf_pos;
354 
355 
356 	/* let's find column with tab characters taken into account ... */
357 	col = 1L;
358 	for( i = latest_lf_pos+1L; i <pos; i++)
359 	{
360 	  if(buf[i] == tab_char) col = ((col+tab_size-1)/tab_size)*tab_size + 1L;
361 	  else			 col++;
362 	}
363 	*column = col;
364     }
365     return;
366 }   /* textPosToLineColumn */
367 
368 
369 /* textPosToLineColumns converts the text position in the buffer to the
370    appropriate line and column values; buf_size was specially introduce
371    to allow using this procedure for character buffers which may be not
372    NULL terminated but with known size (usually buf_size is simply calculated
373    as (long) strlen(buf) for NULL terminated strings).
374  */
375 
376 
377 #ifdef _NO_PROTO
back_textPosToLineColumn(buf,buf_size,pos,tab_size,line,column,column_ntab)378 void back_textPosToLineColumn(buf, buf_size, pos, tab_size, line, column, column_ntab)
379 char *buf;
380 long buf_size;
381 long pos;
382 int  tab_size;
383 long *line;
384 long *column;
385 long *column_ntab;
386 #else  /* _NO_PROTO */
387 void back_textPosToLineColumn(char *buf, long buf_size, long pos, int tab_size, long *line, long *column,
388 		long *column_ntab)
389 #endif
390 {
391     char new_line = '\n';
392     char tab_char = '\t';
393 
394     register long i, j, col;
395     long latest_lf_pos = -1L;   /* remeber the position of the latest found line feed */
396 				/* in Motif XmTextPosition is defined as long ... */
397 
398     *line = 0L;
399     *column_ntab = *column = 1L;
400 
401     if(pos > buf_size)   return;
402     else
403     {
404 	for( i = buf_size; i >= 0L; i--)
405 	{
406 
407           if(buf[i] == new_line)
408 		(*line)++;		/* count the number of skipped lines */
409 	  if(i == pos) break;   /* we have reached the right position */
410 
411         }
412 
413 	/* go backward untill you find a new_line ... */
414 
415 	for(j=i-1; j>= 0L; j--)
416 	{
417 	    if(buf[j] == new_line)
418 	    {
419 		latest_lf_pos = j;
420 		break;
421 	    }
422 	}
423 	/* setting the column number  - version without taking into account tab characters*/
424 	*column_ntab = *column = pos - latest_lf_pos;
425 
426 	/* let's find column with tab characters taken into account ... */
427 	col = 1L;
428 	for( i = latest_lf_pos+1L; i <pos; i++)
429 	{
430 	  if(buf[i] == tab_char) col = ((col+tab_size-1)/tab_size)*tab_size + 1L;
431 	  else                   col++;
432 	}
433 	*column = col;
434     }
435     return;
436 }   /* back_textPosToLineColumn */
437 
438 
439 
440