xref: /minix/external/bsd/nvi/dist/vi/v_word.c (revision 0a6a1f1d)
1 /*	$NetBSD: v_word.c,v 1.3 2014/01/26 21:43:45 christos Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp  (Berkeley) Date: 2001/06/25 15:19:36 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: v_word.c,v 1.3 2014/01/26 21:43:45 christos Exp $");
20 #endif
21 
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25 
26 #include <bitstring.h>
27 #include <ctype.h>
28 #include <limits.h>
29 #include <stdio.h>
30 
31 #include "../common/common.h"
32 #include "vi.h"
33 
34 /*
35  * There are two types of "words".  Bigwords are easy -- groups of anything
36  * delimited by whitespace.  Normal words are trickier.  They are either a
37  * group of characters, numbers and underscores, or a group of anything but,
38  * delimited by whitespace.  When for a word, if you're in whitespace, it's
39  * easy, just remove the whitespace and go to the beginning or end of the
40  * word.  Otherwise, figure out if the next character is in a different group.
41  * If it is, go to the beginning or end of that group, otherwise, go to the
42  * beginning or end of the current group.  The historic version of vi didn't
43  * get this right, so, for example, there were cases where "4e" was not the
44  * same as "eeee" -- in particular, single character words, and commands that
45  * began in whitespace were almost always handled incorrectly.  To get it right
46  * you have to resolve the cursor after each search so that the look-ahead to
47  * figure out what type of "word" the cursor is in will be correct.
48  *
49  * Empty lines, and lines that consist of only white-space characters count
50  * as a single word, and the beginning and end of the file counts as an
51  * infinite number of words.
52  *
53  * Movements associated with commands are different than movement commands.
54  * For example, in "abc  def", with the cursor on the 'a', "cw" is from
55  * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
56  * space is discarded from the change movement.  Another example is that,
57  * in the same string, a "cw" on any white space character replaces that
58  * single character, and nothing else.  Ain't nothin' in here that's easy.
59  *
60  * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
61  * would treat groups of empty lines as individual words, i.e. the command
62  * would move the cursor to each new empty line.  The 'e' and 'E' commands
63  * would treat groups of empty lines as a single word, i.e. the first use
64  * would move past the group of lines.  The 'b' command would just beep at
65  * you, or, if you did it from the start of the line as part of a motion
66  * command, go absolutely nuts.  If the lines contained only white-space
67  * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
68  * 'b', 'E' and 'e' commands would treat the group as a single word, and
69  * the 'B' and 'b' commands will treat the lines as individual words.  This
70  * implementation treats all of these cases as a single white-space word.
71  */
72 
73 enum which {BIGWORD, LITTLEWORD};
74 
75 static int bword __P((SCR *, VICMD *, enum which));
76 static int eword __P((SCR *, VICMD *, enum which));
77 static int fword __P((SCR *, VICMD *, enum which));
78 
79 /*
80  * v_wordW -- [count]W
81  *	Move forward a bigword at a time.
82  *
83  * PUBLIC: int v_wordW __P((SCR *, VICMD *));
84  */
85 int
v_wordW(SCR * sp,VICMD * vp)86 v_wordW(SCR *sp, VICMD *vp)
87 {
88 	return (fword(sp, vp, BIGWORD));
89 }
90 
91 /*
92  * v_wordw -- [count]w
93  *	Move forward a word at a time.
94  *
95  * PUBLIC: int v_wordw __P((SCR *, VICMD *));
96  */
97 int
v_wordw(SCR * sp,VICMD * vp)98 v_wordw(SCR *sp, VICMD *vp)
99 {
100 	return (fword(sp, vp, LITTLEWORD));
101 }
102 
103 /*
104  * fword --
105  *	Move forward by words.
106  */
107 static int
fword(SCR * sp,VICMD * vp,enum which type)108 fword(SCR *sp, VICMD *vp, enum which type)
109 {
110 	enum { INWORD, NOTWORD } state;
111 	VCS cs;
112 	u_long cnt;
113 
114 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
115 	cs.cs_lno = vp->m_start.lno;
116 	cs.cs_cno = vp->m_start.cno;
117 	if (cs_init(sp, &cs))
118 		return (1);
119 
120 	/*
121 	 * If in white-space:
122 	 *	If the count is 1, and it's a change command, we're done.
123 	 *	Else, move to the first non-white-space character, which
124 	 *	counts as a single word move.  If it's a motion command,
125 	 *	don't move off the end of the line.
126 	 */
127 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) {
128 		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
129 			if (ISCMD(vp->rkp, 'c'))
130 				return (0);
131 			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
132 				if (cs_fspace(sp, &cs))
133 					return (1);
134 				goto ret;
135 			}
136 		}
137 		if (cs_fblank(sp, &cs))
138 			return (1);
139 		--cnt;
140 	}
141 
142 	/*
143 	 * Cyclically move to the next word -- this involves skipping
144 	 * over word characters and then any trailing non-word characters.
145 	 * Note, for the 'w' command, the definition of a word keeps
146 	 * switching.
147 	 */
148 	if (type == BIGWORD)
149 		while (cnt--) {
150 			for (;;) {
151 				if (cs_next(sp, &cs))
152 					return (1);
153 				if (cs.cs_flags == CS_EOF)
154 					goto ret;
155 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
156 					break;
157 			}
158 			/*
159 			 * If a motion command and we're at the end of the
160 			 * last word, we're done.  Delete and yank eat any
161 			 * trailing blanks, but we don't move off the end
162 			 * of the line regardless.
163 			 */
164 			if (cnt == 0 && ISMOTION(vp)) {
165 				if ((ISCMD(vp->rkp, 'd') ||
166 				    ISCMD(vp->rkp, 'y')) &&
167 				    cs_fspace(sp, &cs))
168 					return (1);
169 				break;
170 			}
171 
172 			/* Eat whitespace characters. */
173 			if (cs_fblank(sp, &cs))
174 				return (1);
175 			if (cs.cs_flags == CS_EOF)
176 				goto ret;
177 		}
178 	else
179 		while (cnt--) {
180 			state = cs.cs_flags == 0 &&
181 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
182 			for (;;) {
183 				if (cs_next(sp, &cs))
184 					return (1);
185 				if (cs.cs_flags == CS_EOF)
186 					goto ret;
187 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
188 					break;
189 				if (state == INWORD) {
190 					if (!inword(cs.cs_ch))
191 						break;
192 				} else
193 					if (inword(cs.cs_ch))
194 						break;
195 			}
196 			/* See comment above. */
197 			if (cnt == 0 && ISMOTION(vp)) {
198 				if ((ISCMD(vp->rkp, 'd') ||
199 				    ISCMD(vp->rkp, 'y')) &&
200 				    cs_fspace(sp, &cs))
201 					return (1);
202 				break;
203 			}
204 
205 			/* Eat whitespace characters. */
206 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
207 				if (cs_fblank(sp, &cs))
208 					return (1);
209 			if (cs.cs_flags == CS_EOF)
210 				goto ret;
211 		}
212 
213 	/*
214 	 * If we didn't move, we must be at EOF.
215 	 *
216 	 * !!!
217 	 * That's okay for motion commands, however.
218 	 */
219 ret:	if (!ISMOTION(vp) &&
220 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
221 		v_eof(sp, &vp->m_start);
222 		return (1);
223 	}
224 
225 	/* Adjust the end of the range for motion commands. */
226 	vp->m_stop.lno = cs.cs_lno;
227 	vp->m_stop.cno = cs.cs_cno;
228 	if (ISMOTION(vp) && cs.cs_flags == 0)
229 		--vp->m_stop.cno;
230 
231 	/*
232 	 * Non-motion commands move to the end of the range.  Delete
233 	 * and yank stay at the start, ignore others.
234 	 */
235 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
236 	return (0);
237 }
238 
239 /*
240  * v_wordE -- [count]E
241  *	Move forward to the end of the bigword.
242  *
243  * PUBLIC: int v_wordE __P((SCR *, VICMD *));
244  */
245 int
v_wordE(SCR * sp,VICMD * vp)246 v_wordE(SCR *sp, VICMD *vp)
247 {
248 	return (eword(sp, vp, BIGWORD));
249 }
250 
251 /*
252  * v_worde -- [count]e
253  *	Move forward to the end of the word.
254  *
255  * PUBLIC: int v_worde __P((SCR *, VICMD *));
256  */
257 int
v_worde(SCR * sp,VICMD * vp)258 v_worde(SCR *sp, VICMD *vp)
259 {
260 	return (eword(sp, vp, LITTLEWORD));
261 }
262 
263 /*
264  * eword --
265  *	Move forward to the end of the word.
266  */
267 static int
eword(SCR * sp,VICMD * vp,enum which type)268 eword(SCR *sp, VICMD *vp, enum which type)
269 {
270 	enum { INWORD, NOTWORD } state;
271 	VCS cs;
272 	u_long cnt;
273 
274 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
275 	cs.cs_lno = vp->m_start.lno;
276 	cs.cs_cno = vp->m_start.cno;
277 	if (cs_init(sp, &cs))
278 		return (1);
279 
280 	/*
281 	 * !!!
282 	 * If in whitespace, or the next character is whitespace, move past
283 	 * it.  (This doesn't count as a word move.)  Stay at the character
284 	 * past the current one, it sets word "state" for the 'e' command.
285 	 */
286 	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
287 		if (cs_next(sp, &cs))
288 			return (1);
289 		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
290 			goto start;
291 	}
292 	if (cs_fblank(sp, &cs))
293 		return (1);
294 
295 	/*
296 	 * Cyclically move to the next word -- this involves skipping
297 	 * over word characters and then any trailing non-word characters.
298 	 * Note, for the 'e' command, the definition of a word keeps
299 	 * switching.
300 	 */
301 start:	if (type == BIGWORD)
302 		while (cnt--) {
303 			for (;;) {
304 				if (cs_next(sp, &cs))
305 					return (1);
306 				if (cs.cs_flags == CS_EOF)
307 					goto ret;
308 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
309 					break;
310 			}
311 			/*
312 			 * When we reach the start of the word after the last
313 			 * word, we're done.  If we changed state, back up one
314 			 * to the end of the previous word.
315 			 */
316 			if (cnt == 0) {
317 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
318 					return (1);
319 				break;
320 			}
321 
322 			/* Eat whitespace characters. */
323 			if (cs_fblank(sp, &cs))
324 				return (1);
325 			if (cs.cs_flags == CS_EOF)
326 				goto ret;
327 		}
328 	else
329 		while (cnt--) {
330 			state = cs.cs_flags == 0 &&
331 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
332 			for (;;) {
333 				if (cs_next(sp, &cs))
334 					return (1);
335 				if (cs.cs_flags == CS_EOF)
336 					goto ret;
337 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
338 					break;
339 				if (state == INWORD) {
340 					if (!inword(cs.cs_ch))
341 						break;
342 				} else
343 					if (inword(cs.cs_ch))
344 						break;
345 			}
346 			/* See comment above. */
347 			if (cnt == 0) {
348 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
349 					return (1);
350 				break;
351 			}
352 
353 			/* Eat whitespace characters. */
354 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
355 				if (cs_fblank(sp, &cs))
356 					return (1);
357 			if (cs.cs_flags == CS_EOF)
358 				goto ret;
359 		}
360 
361 	/*
362 	 * If we didn't move, we must be at EOF.
363 	 *
364 	 * !!!
365 	 * That's okay for motion commands, however.
366 	 */
367 ret:	if (!ISMOTION(vp) &&
368 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
369 		v_eof(sp, &vp->m_start);
370 		return (1);
371 	}
372 
373 	/* Set the end of the range for motion commands. */
374 	vp->m_stop.lno = cs.cs_lno;
375 	vp->m_stop.cno = cs.cs_cno;
376 
377 	/*
378 	 * Non-motion commands move to the end of the range.
379 	 * Delete and yank stay at the start, ignore others.
380 	 */
381 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
382 	return (0);
383 }
384 
385 /*
386  * v_WordB -- [count]B
387  *	Move backward a bigword at a time.
388  *
389  * PUBLIC: int v_wordB __P((SCR *, VICMD *));
390  */
391 int
v_wordB(SCR * sp,VICMD * vp)392 v_wordB(SCR *sp, VICMD *vp)
393 {
394 	return (bword(sp, vp, BIGWORD));
395 }
396 
397 /*
398  * v_wordb -- [count]b
399  *	Move backward a word at a time.
400  *
401  * PUBLIC: int v_wordb __P((SCR *, VICMD *));
402  */
403 int
v_wordb(SCR * sp,VICMD * vp)404 v_wordb(SCR *sp, VICMD *vp)
405 {
406 	return (bword(sp, vp, LITTLEWORD));
407 }
408 
409 /*
410  * bword --
411  *	Move backward by words.
412  */
413 static int
bword(SCR * sp,VICMD * vp,enum which type)414 bword(SCR *sp, VICMD *vp, enum which type)
415 {
416 	enum { INWORD, NOTWORD } state;
417 	VCS cs;
418 	u_long cnt;
419 
420 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
421 	cs.cs_lno = vp->m_start.lno;
422 	cs.cs_cno = vp->m_start.cno;
423 	if (cs_init(sp, &cs))
424 		return (1);
425 
426 	/*
427 	 * !!!
428 	 * If in whitespace, or the previous character is whitespace, move
429 	 * past it.  (This doesn't count as a word move.)  Stay at the
430 	 * character before the current one, it sets word "state" for the
431 	 * 'b' command.
432 	 */
433 	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
434 		if (cs_prev(sp, &cs))
435 			return (1);
436 		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
437 			goto start;
438 	}
439 	if (cs_bblank(sp, &cs))
440 		return (1);
441 
442 	/*
443 	 * Cyclically move to the beginning of the previous word -- this
444 	 * involves skipping over word characters and then any trailing
445 	 * non-word characters.  Note, for the 'b' command, the definition
446 	 * of a word keeps switching.
447 	 */
448 start:	if (type == BIGWORD)
449 		while (cnt--) {
450 			for (;;) {
451 				if (cs_prev(sp, &cs))
452 					return (1);
453 				if (cs.cs_flags == CS_SOF)
454 					goto ret;
455 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
456 					break;
457 			}
458 			/*
459 			 * When we reach the end of the word before the last
460 			 * word, we're done.  If we changed state, move forward
461 			 * one to the end of the next word.
462 			 */
463 			if (cnt == 0) {
464 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
465 					return (1);
466 				break;
467 			}
468 
469 			/* Eat whitespace characters. */
470 			if (cs_bblank(sp, &cs))
471 				return (1);
472 			if (cs.cs_flags == CS_SOF)
473 				goto ret;
474 		}
475 	else
476 		while (cnt--) {
477 			state = cs.cs_flags == 0 &&
478 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
479 			for (;;) {
480 				if (cs_prev(sp, &cs))
481 					return (1);
482 				if (cs.cs_flags == CS_SOF)
483 					goto ret;
484 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
485 					break;
486 				if (state == INWORD) {
487 					if (!inword(cs.cs_ch))
488 						break;
489 				} else
490 					if (inword(cs.cs_ch))
491 						break;
492 			}
493 			/* See comment above. */
494 			if (cnt == 0) {
495 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
496 					return (1);
497 				break;
498 			}
499 
500 			/* Eat whitespace characters. */
501 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
502 				if (cs_bblank(sp, &cs))
503 					return (1);
504 			if (cs.cs_flags == CS_SOF)
505 				goto ret;
506 		}
507 
508 	/* If we didn't move, we must be at SOF. */
509 ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
510 		v_sof(sp, &vp->m_start);
511 		return (1);
512 	}
513 
514 	/* Set the end of the range for motion commands. */
515 	vp->m_stop.lno = cs.cs_lno;
516 	vp->m_stop.cno = cs.cs_cno;
517 
518 	/*
519 	 * All commands move to the end of the range.  Motion commands
520 	 * adjust the starting point to the character before the current
521 	 * one.
522 	 *
523 	 * !!!
524 	 * The historic vi didn't get this right -- the `yb' command yanked
525 	 * the right stuff and even updated the cursor value, but the cursor
526 	 * was not actually updated on the screen.
527 	 */
528 	vp->m_final = vp->m_stop;
529 	if (ISMOTION(vp))
530 		--vp->m_start.cno;
531 	return (0);
532 }
533