1 /* $NetBSD: v_word.c,v 1.3 2014/01/26 21:43:45 christos Exp $ */ 2 /*- 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * Copyright (c) 1992, 1993, 1994, 1995, 1996 6 * Keith Bostic. All rights reserved. 7 * 8 * See the LICENSE file for redistribution information. 9 */ 10 11 #include "config.h" 12 13 #include <sys/cdefs.h> 14 #if 0 15 #ifndef lint 16 static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp (Berkeley) Date: 2001/06/25 15:19:36 "; 17 #endif /* not lint */ 18 #else 19 __RCSID("$NetBSD: v_word.c,v 1.3 2014/01/26 21:43:45 christos Exp $"); 20 #endif 21 22 #include <sys/types.h> 23 #include <sys/queue.h> 24 #include <sys/time.h> 25 26 #include <bitstring.h> 27 #include <ctype.h> 28 #include <limits.h> 29 #include <stdio.h> 30 31 #include "../common/common.h" 32 #include "vi.h" 33 34 /* 35 * There are two types of "words". Bigwords are easy -- groups of anything 36 * delimited by whitespace. Normal words are trickier. They are either a 37 * group of characters, numbers and underscores, or a group of anything but, 38 * delimited by whitespace. When for a word, if you're in whitespace, it's 39 * easy, just remove the whitespace and go to the beginning or end of the 40 * word. Otherwise, figure out if the next character is in a different group. 41 * If it is, go to the beginning or end of that group, otherwise, go to the 42 * beginning or end of the current group. The historic version of vi didn't 43 * get this right, so, for example, there were cases where "4e" was not the 44 * same as "eeee" -- in particular, single character words, and commands that 45 * began in whitespace were almost always handled incorrectly. To get it right 46 * you have to resolve the cursor after each search so that the look-ahead to 47 * figure out what type of "word" the cursor is in will be correct. 48 * 49 * Empty lines, and lines that consist of only white-space characters count 50 * as a single word, and the beginning and end of the file counts as an 51 * infinite number of words. 52 * 53 * Movements associated with commands are different than movement commands. 54 * For example, in "abc def", with the cursor on the 'a', "cw" is from 55 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 56 * space is discarded from the change movement. Another example is that, 57 * in the same string, a "cw" on any white space character replaces that 58 * single character, and nothing else. Ain't nothin' in here that's easy. 59 * 60 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 61 * would treat groups of empty lines as individual words, i.e. the command 62 * would move the cursor to each new empty line. The 'e' and 'E' commands 63 * would treat groups of empty lines as a single word, i.e. the first use 64 * would move past the group of lines. The 'b' command would just beep at 65 * you, or, if you did it from the start of the line as part of a motion 66 * command, go absolutely nuts. If the lines contained only white-space 67 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 68 * 'b', 'E' and 'e' commands would treat the group as a single word, and 69 * the 'B' and 'b' commands will treat the lines as individual words. This 70 * implementation treats all of these cases as a single white-space word. 71 */ 72 73 enum which {BIGWORD, LITTLEWORD}; 74 75 static int bword __P((SCR *, VICMD *, enum which)); 76 static int eword __P((SCR *, VICMD *, enum which)); 77 static int fword __P((SCR *, VICMD *, enum which)); 78 79 /* 80 * v_wordW -- [count]W 81 * Move forward a bigword at a time. 82 * 83 * PUBLIC: int v_wordW __P((SCR *, VICMD *)); 84 */ 85 int 86 v_wordW(SCR *sp, VICMD *vp) 87 { 88 return (fword(sp, vp, BIGWORD)); 89 } 90 91 /* 92 * v_wordw -- [count]w 93 * Move forward a word at a time. 94 * 95 * PUBLIC: int v_wordw __P((SCR *, VICMD *)); 96 */ 97 int 98 v_wordw(SCR *sp, VICMD *vp) 99 { 100 return (fword(sp, vp, LITTLEWORD)); 101 } 102 103 /* 104 * fword -- 105 * Move forward by words. 106 */ 107 static int 108 fword(SCR *sp, VICMD *vp, enum which type) 109 { 110 enum { INWORD, NOTWORD } state; 111 VCS cs; 112 u_long cnt; 113 114 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 115 cs.cs_lno = vp->m_start.lno; 116 cs.cs_cno = vp->m_start.cno; 117 if (cs_init(sp, &cs)) 118 return (1); 119 120 /* 121 * If in white-space: 122 * If the count is 1, and it's a change command, we're done. 123 * Else, move to the first non-white-space character, which 124 * counts as a single word move. If it's a motion command, 125 * don't move off the end of the line. 126 */ 127 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) { 128 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 129 if (ISCMD(vp->rkp, 'c')) 130 return (0); 131 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 132 if (cs_fspace(sp, &cs)) 133 return (1); 134 goto ret; 135 } 136 } 137 if (cs_fblank(sp, &cs)) 138 return (1); 139 --cnt; 140 } 141 142 /* 143 * Cyclically move to the next word -- this involves skipping 144 * over word characters and then any trailing non-word characters. 145 * Note, for the 'w' command, the definition of a word keeps 146 * switching. 147 */ 148 if (type == BIGWORD) 149 while (cnt--) { 150 for (;;) { 151 if (cs_next(sp, &cs)) 152 return (1); 153 if (cs.cs_flags == CS_EOF) 154 goto ret; 155 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 156 break; 157 } 158 /* 159 * If a motion command and we're at the end of the 160 * last word, we're done. Delete and yank eat any 161 * trailing blanks, but we don't move off the end 162 * of the line regardless. 163 */ 164 if (cnt == 0 && ISMOTION(vp)) { 165 if ((ISCMD(vp->rkp, 'd') || 166 ISCMD(vp->rkp, 'y')) && 167 cs_fspace(sp, &cs)) 168 return (1); 169 break; 170 } 171 172 /* Eat whitespace characters. */ 173 if (cs_fblank(sp, &cs)) 174 return (1); 175 if (cs.cs_flags == CS_EOF) 176 goto ret; 177 } 178 else 179 while (cnt--) { 180 state = cs.cs_flags == 0 && 181 inword(cs.cs_ch) ? INWORD : NOTWORD; 182 for (;;) { 183 if (cs_next(sp, &cs)) 184 return (1); 185 if (cs.cs_flags == CS_EOF) 186 goto ret; 187 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 188 break; 189 if (state == INWORD) { 190 if (!inword(cs.cs_ch)) 191 break; 192 } else 193 if (inword(cs.cs_ch)) 194 break; 195 } 196 /* See comment above. */ 197 if (cnt == 0 && ISMOTION(vp)) { 198 if ((ISCMD(vp->rkp, 'd') || 199 ISCMD(vp->rkp, 'y')) && 200 cs_fspace(sp, &cs)) 201 return (1); 202 break; 203 } 204 205 /* Eat whitespace characters. */ 206 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 207 if (cs_fblank(sp, &cs)) 208 return (1); 209 if (cs.cs_flags == CS_EOF) 210 goto ret; 211 } 212 213 /* 214 * If we didn't move, we must be at EOF. 215 * 216 * !!! 217 * That's okay for motion commands, however. 218 */ 219 ret: if (!ISMOTION(vp) && 220 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 221 v_eof(sp, &vp->m_start); 222 return (1); 223 } 224 225 /* Adjust the end of the range for motion commands. */ 226 vp->m_stop.lno = cs.cs_lno; 227 vp->m_stop.cno = cs.cs_cno; 228 if (ISMOTION(vp) && cs.cs_flags == 0) 229 --vp->m_stop.cno; 230 231 /* 232 * Non-motion commands move to the end of the range. Delete 233 * and yank stay at the start, ignore others. 234 */ 235 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 236 return (0); 237 } 238 239 /* 240 * v_wordE -- [count]E 241 * Move forward to the end of the bigword. 242 * 243 * PUBLIC: int v_wordE __P((SCR *, VICMD *)); 244 */ 245 int 246 v_wordE(SCR *sp, VICMD *vp) 247 { 248 return (eword(sp, vp, BIGWORD)); 249 } 250 251 /* 252 * v_worde -- [count]e 253 * Move forward to the end of the word. 254 * 255 * PUBLIC: int v_worde __P((SCR *, VICMD *)); 256 */ 257 int 258 v_worde(SCR *sp, VICMD *vp) 259 { 260 return (eword(sp, vp, LITTLEWORD)); 261 } 262 263 /* 264 * eword -- 265 * Move forward to the end of the word. 266 */ 267 static int 268 eword(SCR *sp, VICMD *vp, enum which type) 269 { 270 enum { INWORD, NOTWORD } state; 271 VCS cs; 272 u_long cnt; 273 274 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 275 cs.cs_lno = vp->m_start.lno; 276 cs.cs_cno = vp->m_start.cno; 277 if (cs_init(sp, &cs)) 278 return (1); 279 280 /* 281 * !!! 282 * If in whitespace, or the next character is whitespace, move past 283 * it. (This doesn't count as a word move.) Stay at the character 284 * past the current one, it sets word "state" for the 'e' command. 285 */ 286 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 287 if (cs_next(sp, &cs)) 288 return (1); 289 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 290 goto start; 291 } 292 if (cs_fblank(sp, &cs)) 293 return (1); 294 295 /* 296 * Cyclically move to the next word -- this involves skipping 297 * over word characters and then any trailing non-word characters. 298 * Note, for the 'e' command, the definition of a word keeps 299 * switching. 300 */ 301 start: if (type == BIGWORD) 302 while (cnt--) { 303 for (;;) { 304 if (cs_next(sp, &cs)) 305 return (1); 306 if (cs.cs_flags == CS_EOF) 307 goto ret; 308 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 309 break; 310 } 311 /* 312 * When we reach the start of the word after the last 313 * word, we're done. If we changed state, back up one 314 * to the end of the previous word. 315 */ 316 if (cnt == 0) { 317 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 318 return (1); 319 break; 320 } 321 322 /* Eat whitespace characters. */ 323 if (cs_fblank(sp, &cs)) 324 return (1); 325 if (cs.cs_flags == CS_EOF) 326 goto ret; 327 } 328 else 329 while (cnt--) { 330 state = cs.cs_flags == 0 && 331 inword(cs.cs_ch) ? INWORD : NOTWORD; 332 for (;;) { 333 if (cs_next(sp, &cs)) 334 return (1); 335 if (cs.cs_flags == CS_EOF) 336 goto ret; 337 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 338 break; 339 if (state == INWORD) { 340 if (!inword(cs.cs_ch)) 341 break; 342 } else 343 if (inword(cs.cs_ch)) 344 break; 345 } 346 /* See comment above. */ 347 if (cnt == 0) { 348 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 349 return (1); 350 break; 351 } 352 353 /* Eat whitespace characters. */ 354 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 355 if (cs_fblank(sp, &cs)) 356 return (1); 357 if (cs.cs_flags == CS_EOF) 358 goto ret; 359 } 360 361 /* 362 * If we didn't move, we must be at EOF. 363 * 364 * !!! 365 * That's okay for motion commands, however. 366 */ 367 ret: if (!ISMOTION(vp) && 368 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 369 v_eof(sp, &vp->m_start); 370 return (1); 371 } 372 373 /* Set the end of the range for motion commands. */ 374 vp->m_stop.lno = cs.cs_lno; 375 vp->m_stop.cno = cs.cs_cno; 376 377 /* 378 * Non-motion commands move to the end of the range. 379 * Delete and yank stay at the start, ignore others. 380 */ 381 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 382 return (0); 383 } 384 385 /* 386 * v_WordB -- [count]B 387 * Move backward a bigword at a time. 388 * 389 * PUBLIC: int v_wordB __P((SCR *, VICMD *)); 390 */ 391 int 392 v_wordB(SCR *sp, VICMD *vp) 393 { 394 return (bword(sp, vp, BIGWORD)); 395 } 396 397 /* 398 * v_wordb -- [count]b 399 * Move backward a word at a time. 400 * 401 * PUBLIC: int v_wordb __P((SCR *, VICMD *)); 402 */ 403 int 404 v_wordb(SCR *sp, VICMD *vp) 405 { 406 return (bword(sp, vp, LITTLEWORD)); 407 } 408 409 /* 410 * bword -- 411 * Move backward by words. 412 */ 413 static int 414 bword(SCR *sp, VICMD *vp, enum which type) 415 { 416 enum { INWORD, NOTWORD } state; 417 VCS cs; 418 u_long cnt; 419 420 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 421 cs.cs_lno = vp->m_start.lno; 422 cs.cs_cno = vp->m_start.cno; 423 if (cs_init(sp, &cs)) 424 return (1); 425 426 /* 427 * !!! 428 * If in whitespace, or the previous character is whitespace, move 429 * past it. (This doesn't count as a word move.) Stay at the 430 * character before the current one, it sets word "state" for the 431 * 'b' command. 432 */ 433 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 434 if (cs_prev(sp, &cs)) 435 return (1); 436 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 437 goto start; 438 } 439 if (cs_bblank(sp, &cs)) 440 return (1); 441 442 /* 443 * Cyclically move to the beginning of the previous word -- this 444 * involves skipping over word characters and then any trailing 445 * non-word characters. Note, for the 'b' command, the definition 446 * of a word keeps switching. 447 */ 448 start: if (type == BIGWORD) 449 while (cnt--) { 450 for (;;) { 451 if (cs_prev(sp, &cs)) 452 return (1); 453 if (cs.cs_flags == CS_SOF) 454 goto ret; 455 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 456 break; 457 } 458 /* 459 * When we reach the end of the word before the last 460 * word, we're done. If we changed state, move forward 461 * one to the end of the next word. 462 */ 463 if (cnt == 0) { 464 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 465 return (1); 466 break; 467 } 468 469 /* Eat whitespace characters. */ 470 if (cs_bblank(sp, &cs)) 471 return (1); 472 if (cs.cs_flags == CS_SOF) 473 goto ret; 474 } 475 else 476 while (cnt--) { 477 state = cs.cs_flags == 0 && 478 inword(cs.cs_ch) ? INWORD : NOTWORD; 479 for (;;) { 480 if (cs_prev(sp, &cs)) 481 return (1); 482 if (cs.cs_flags == CS_SOF) 483 goto ret; 484 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 485 break; 486 if (state == INWORD) { 487 if (!inword(cs.cs_ch)) 488 break; 489 } else 490 if (inword(cs.cs_ch)) 491 break; 492 } 493 /* See comment above. */ 494 if (cnt == 0) { 495 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 496 return (1); 497 break; 498 } 499 500 /* Eat whitespace characters. */ 501 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 502 if (cs_bblank(sp, &cs)) 503 return (1); 504 if (cs.cs_flags == CS_SOF) 505 goto ret; 506 } 507 508 /* If we didn't move, we must be at SOF. */ 509 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 510 v_sof(sp, &vp->m_start); 511 return (1); 512 } 513 514 /* Set the end of the range for motion commands. */ 515 vp->m_stop.lno = cs.cs_lno; 516 vp->m_stop.cno = cs.cs_cno; 517 518 /* 519 * All commands move to the end of the range. Motion commands 520 * adjust the starting point to the character before the current 521 * one. 522 * 523 * !!! 524 * The historic vi didn't get this right -- the `yb' command yanked 525 * the right stuff and even updated the cursor value, but the cursor 526 * was not actually updated on the screen. 527 */ 528 vp->m_final = vp->m_stop; 529 if (ISMOTION(vp)) 530 --vp->m_start.cno; 531 return (0); 532 } 533