1 /* vifm
2 * Copyright (C) 2013 xaizek.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19 #include "escape.h"
20
21 #include <regex.h>
22
23 #include <curses.h>
24
25 #include <assert.h> /* assert() */
26 #include <ctype.h> /* iscntrl() isdigit() */
27 #include <stddef.h> /* NULL size_t */
28 #include <stdlib.h> /* free() malloc() realloc() strtol() */
29 #include <string.h> /* memcpy() memset() strchr() strcpy() strdup() strlen()
30 strncpy() */
31
32 #include "../cfg/config.h"
33 #include "../compat/reallocarray.h"
34 #include "../utils/test_helpers.h"
35 #include "../utils/str.h"
36 #include "../utils/utf8.h"
37 #include "../utils/utils.h"
38 #include "ui.h"
39
40 static char * add_pattern_highlights(const char line[], size_t len,
41 const char no_esc[], const int offsets[], const regex_t *re);
42 static size_t correct_offset(const char line[], const int offsets[],
43 size_t offset);
44 static size_t count_substr_chars(const char line[], regmatch_t *match);
45 static char * add_highlighted_substr(const char sub[], size_t sub_len,
46 char out[]);
47 static char * add_highlighted_sym(const char sym[], size_t sym_width,
48 char out[]);
49 TSTATIC size_t get_char_width_esc(const char str[]);
50 static void print_char_esc(WINDOW *win, const char str[], esc_state *state);
51 TSTATIC void esc_state_update(esc_state *state, const char str[]);
52 static void esc_state_process_attr(esc_state *state, int n);
53 static void esc_state_set_attr(esc_state *state, int n);
54 TSTATIC const char * strchar2str(const char str[], int pos,
55 size_t *screen_width);
56
57 /* Escape sequence which starts block of highlighted symbols. */
58 static const char INV_START[] = "\033[7,1m";
59 /* Escape sequence which ends block of highlighted symbols. */
60 static const char INV_END[] = "\033[27,22m";
61 /* Number of extra characters added to highlight one string object. */
62 static const size_t INV_OVERHEAD = sizeof(INV_START) - 1 + sizeof(INV_END) - 1;
63
64 char *
esc_remove(const char str[])65 esc_remove(const char str[])
66 {
67 char *no_esc = strdup(str);
68 if(no_esc != NULL)
69 {
70 char *p = no_esc;
71 while(*str != '\0')
72 {
73 const size_t char_width_esc = get_char_width_esc(str);
74 if(*str != '\033')
75 {
76 memcpy(p, str, char_width_esc);
77 p += char_width_esc;
78 }
79 str += char_width_esc;
80 }
81 *p = '\0';
82 }
83 return no_esc;
84 }
85
86 size_t
esc_str_overhead(const char str[])87 esc_str_overhead(const char str[])
88 {
89 size_t overhead = 0U;
90 while(*str != '\0')
91 {
92 const size_t char_width_esc = get_char_width_esc(str);
93 if(*str == '\033')
94 {
95 overhead += char_width_esc;
96 }
97 str += char_width_esc;
98 }
99 return overhead;
100 }
101
102 char *
esc_highlight_pattern(const char line[],const regex_t * re)103 esc_highlight_pattern(const char line[], const regex_t *re)
104 {
105 char *processed;
106 const size_t len = strlen(line);
107
108 int *const offsets = reallocarray(NULL, len + 1, sizeof(int));
109
110 char *const no_esc = malloc(len + 1);
111 char *no_esc_sym = no_esc;
112 int no_esc_sym_pos = 0;
113
114 /* Fill no_esc and offsets. */
115 const char *src_sym = line;
116 while(*src_sym != '\0')
117 {
118 const size_t char_width_esc = get_char_width_esc(src_sym);
119 if(*src_sym != '\033')
120 {
121 size_t i;
122 const int offset = src_sym - line;
123 /* Each offset value is filled over whole character width. */
124 for(i = 0U; i < char_width_esc; ++i)
125 {
126 offsets[no_esc_sym_pos + i] = offset;
127 }
128 no_esc_sym_pos += char_width_esc;
129
130 memcpy(no_esc_sym, src_sym, char_width_esc);
131 no_esc_sym += char_width_esc;
132 }
133 src_sym += char_width_esc;
134 }
135 offsets[no_esc_sym_pos] = src_sym - line;
136 *no_esc_sym = '\0';
137 assert(no_esc_sym_pos == no_esc_sym - no_esc);
138
139 processed = add_pattern_highlights(line, len, no_esc, offsets, re);
140
141 free(offsets);
142 free(no_esc);
143
144 return (processed == NULL) ? strdup(line) : processed;
145 }
146
147 /* Forms new line with highlights of matcher of the re regular expression using
148 * escape sequences that invert colors. Returns NULL when no match found or
149 * memory allocation error occurred. */
150 static char *
add_pattern_highlights(const char line[],size_t len,const char no_esc[],const int offsets[],const regex_t * re)151 add_pattern_highlights(const char line[], size_t len, const char no_esc[],
152 const int offsets[], const regex_t *re)
153 {
154 /* XXX: this might benefit from a rewrite, logic of when escape sequences are
155 * copied is unclear (sometimes along with first matched character,
156 * sometimes before the match). */
157
158 regmatch_t match;
159 char *next;
160 char *processed;
161 int no_esc_pos = 0;
162 int overhead = 0;
163 int offset;
164
165 if(regexec(re, no_esc, 1, &match, 0) != 0)
166 {
167 return NULL;
168 }
169 if((processed = malloc(len + 1)) == NULL)
170 {
171 return NULL;
172 }
173
174 /* Before the first match. */
175 if(match.rm_so != 0 && no_esc[match.rm_so] == '\0')
176 {
177 /* This is needed to handle possibility of immediate break from the loop
178 * below. */
179 offset = correct_offset(line, offsets, match.rm_so);
180 }
181 else
182 {
183 offset = offsets[match.rm_so];
184 }
185 strncpy(processed, line, offset);
186 next = processed + offset;
187
188 /* All matches. */
189 do
190 {
191 int so_offset;
192 void *ptr;
193 const int empty_match = (match.rm_so == match.rm_eo);
194
195 match.rm_so += no_esc_pos;
196 match.rm_eo += no_esc_pos;
197
198 so_offset = offsets[match.rm_so];
199
200 if(empty_match)
201 {
202 if(no_esc[match.rm_eo] == '\0')
203 {
204 no_esc_pos = match.rm_eo;
205 break;
206 }
207 }
208
209 /* Between matches. */
210 if(no_esc_pos != 0)
211 {
212 const int corrected = correct_offset(line, offsets, no_esc_pos);
213 strncpy(next, line + corrected, so_offset - corrected);
214 }
215
216 if(empty_match)
217 {
218 /* Copy single character after the match to advance forward. */
219
220 /* Position inside the line string. */
221 const int esc_pos = (no_esc_pos == 0)
222 ? (size_t)(next - processed)
223 : correct_offset(line, offsets, no_esc_pos);
224 /* Number of characters to copy from the line string. */
225 const int len = (match.rm_so == 0)
226 ? utf8_chrw(no_esc)
227 : correct_offset(line, offsets, match.rm_so + 1) - esc_pos;
228 strncpy(next, line + esc_pos, len);
229 next += len;
230 no_esc_pos += utf8_chrw(&no_esc[no_esc_pos]);
231 }
232 else
233 {
234 size_t new_overhead;
235 size_t match_len;
236
237 new_overhead = INV_OVERHEAD*count_substr_chars(no_esc, &match);
238 len += new_overhead;
239 if((ptr = realloc(processed, len + 1)) == NULL)
240 {
241 free(processed);
242 return NULL;
243 }
244 processed = ptr;
245
246 match_len = correct_offset(line, offsets, match.rm_eo) - so_offset;
247 next = processed + so_offset + overhead;
248 next = add_highlighted_substr(line + so_offset, match_len, next);
249
250 no_esc_pos = match.rm_eo;
251 overhead += new_overhead;
252 }
253 }
254 while(regexec(re, no_esc + no_esc_pos, 1, &match, 0) == 0);
255
256 /* Abort if there were no non-empty matches. */
257 if(overhead == 0)
258 {
259 free(processed);
260 return 0;
261 }
262
263 /* After the last match. */
264 strcpy(next, line +
265 (no_esc_pos == 0 ? (size_t)(next - processed) :
266 correct_offset(line, offsets, no_esc_pos)));
267
268 return processed;
269 }
270
271 /* Corrects offset inside the line so that it points to the char after previous
272 * character instead of the beginning of the current one. */
273 static size_t
correct_offset(const char line[],const int offsets[],size_t offset)274 correct_offset(const char line[], const int offsets[], size_t offset)
275 {
276 assert(offset != 0U && "Offset has to be greater than zero.");
277 const int prev_offset = offsets[offset - 1];
278 const size_t char_width = utf8_chrw(line + prev_offset);
279 return prev_offset + char_width;
280 }
281
282 /* Counts number of multi-byte characters inside the match of a regular
283 * expression. */
284 static size_t
count_substr_chars(const char line[],regmatch_t * match)285 count_substr_chars(const char line[], regmatch_t *match)
286 {
287 const size_t sub_len = match->rm_eo - match->rm_so;
288 const char *const sub = line + match->rm_so;
289 size_t count = 0;
290 size_t i = 0;
291 while(i < sub_len)
292 {
293 const size_t char_width_esc = get_char_width_esc(sub + i);
294 i += char_width_esc;
295 count++;
296 }
297 return count;
298 }
299
300 /* Adds all symbols of substring pointed to by the sub parameter of the length
301 * sub_len to the out buffer with highlight effect applied. Returns next
302 * position in the out buffer. */
303 static char *
add_highlighted_substr(const char sub[],size_t sub_len,char out[])304 add_highlighted_substr(const char sub[], size_t sub_len, char out[])
305 {
306 size_t i = 0;
307 while(i < sub_len)
308 {
309 const size_t char_width_esc = get_char_width_esc(sub + i);
310 out = add_highlighted_sym(sub + i, char_width_esc, out);
311 i += char_width_esc;
312 }
313 return out;
314 }
315
316 /* Adds one symbol pointed to by the sym parameter of the length sym_width to
317 * the out buffer with highlight effect applied. Returns next position in the
318 * out buffer. */
319 static char *
add_highlighted_sym(const char sym[],size_t sym_width,char out[])320 add_highlighted_sym(const char sym[], size_t sym_width, char out[])
321 {
322 if(sym[0] != '\033')
323 {
324 memcpy(out, INV_START, sizeof(INV_START) - 1);
325 out += sizeof(INV_START) - 1;
326 }
327
328 strncpy(out, sym, sym_width);
329 out += sym_width;
330
331 if(sym[0] != '\033')
332 {
333 memcpy(out, INV_END, sizeof(INV_END) - 1);
334 out += sizeof(INV_END) - 1;
335 }
336
337 return out;
338 }
339
340 int
esc_print_line(const char line[],WINDOW * win,int column,int row,int max_width,int dry_run,int truncated,esc_state * state,int * printed)341 esc_print_line(const char line[], WINDOW *win, int column, int row,
342 int max_width, int dry_run, int truncated, esc_state *state, int *printed)
343 {
344 const char *curr = line;
345 size_t pos = 0U;
346 checked_wmove(win, row, column);
347
348 /* Attributes are set at the beginning of each line and after state change. */
349 col_attr_t col = { .fg = state->fg, .bg = state->bg, .attr = state->attrs };
350 ui_set_attr(win, &col, -1);
351
352 while(pos <= (size_t)max_width && *curr != '\0')
353 {
354 size_t screen_width;
355 const char *const char_str = strchar2str(curr, pos, &screen_width);
356 pos += screen_width;
357 if(pos <= (size_t)max_width)
358 {
359 if(!dry_run || screen_width == 0)
360 {
361 /* Compute real screen width by how much cursor was moved. Sometimes
362 * character width differs from what it should be. */
363 int old_x = getcurx(win);
364
365 print_char_esc(win, char_str, state);
366
367 int new_x = getcurx(win);
368 if(new_x < old_x)
369 {
370 new_x += getmaxx(win);
371 }
372 pos += (new_x - old_x) - screen_width;
373 }
374
375 if(*curr == '\b')
376 {
377 if(!dry_run)
378 {
379 int y, x;
380 getyx(win, y, x);
381 if(x > 0)
382 {
383 checked_wmove(win, y, x - 1);
384 }
385 }
386
387 if(pos > 0)
388 {
389 pos--;
390 }
391 }
392
393 curr += get_char_width_esc(curr);
394 }
395 }
396 *printed = pos;
397
398 if(truncated)
399 {
400 /* Process remaining escape sequences of the line in order to preserve all
401 * elements of highlighting even when lines are not fully drawn. */
402 const char *tail = curr - 1;
403 while((tail = strchr(tail + 1, '\033')) != NULL)
404 {
405 size_t screen_width;
406 const char *const char_str = strchar2str(tail, 0, &screen_width);
407 print_char_esc(win, char_str, state);
408 }
409 }
410
411 return curr - line;
412 }
413
414 /* Returns number of characters at the beginning of the str which form one
415 * logical symbol. Takes UTF-8 encoding and terminal escape sequences into
416 * account. */
417 TSTATIC size_t
get_char_width_esc(const char str[])418 get_char_width_esc(const char str[])
419 {
420 if(*str != '\033')
421 {
422 return utf8_chrw(str);
423 }
424
425 /* Skip prefix. */
426 const char *p = str + 1;
427 if(*p == '[')
428 {
429 ++p;
430 }
431
432 /* And sequences of numbers separated by ";" or ",". */
433 while(isdigit(*p))
434 {
435 do
436 {
437 ++p;
438 }
439 while(isdigit(*p));
440
441 if(*p == ';' || *p == ',')
442 {
443 ++p;
444 }
445 }
446 return (*p == '\0' ? p - str : p - str + 1);
447 }
448
449 /* Prints the leading character of the str to the win window parsing terminal
450 * escape sequences. */
451 static void
print_char_esc(WINDOW * win,const char str[],esc_state * state)452 print_char_esc(WINDOW *win, const char str[], esc_state *state)
453 {
454 if(str[0] == '\033')
455 {
456 esc_state_update(state, str);
457 col_attr_t col = { .fg = state->fg, .bg = state->bg, .attr = state->attrs };
458 ui_set_attr(win, &col, -1);
459 }
460 else
461 {
462 wprint(win, str);
463 }
464 }
465
466 /* Handles escape sequence. Applies whole escape sequence specified by the str
467 * to the state. */
468 TSTATIC void
esc_state_update(esc_state * state,const char str[])469 esc_state_update(esc_state *state, const char str[])
470 {
471 str++;
472 do
473 {
474 int n = 0;
475 if(isdigit(str[1]))
476 {
477 char *end;
478 n = strtol(str + 1, &end, 10);
479 str = end;
480 }
481 else
482 {
483 str++;
484 }
485
486 esc_state_process_attr(state, n);
487 }
488 while(str[0] == ';');
489 }
490
491 /* Processes one escape sequence attribute at a time. Handles both standard and
492 * extended (xterm256) escape sequences. */
493 static void
esc_state_process_attr(esc_state * state,int n)494 esc_state_process_attr(esc_state *state, int n)
495 {
496 switch(state->mode)
497 {
498 case ESM_SHORT:
499 switch(n)
500 {
501 case 38:
502 state->mode = ESM_GOT_FG_PREFIX;
503 break;
504 case 48:
505 state->mode = ESM_GOT_BG_PREFIX;
506 break;
507 default:
508 esc_state_set_attr(state, n);
509 break;
510 }
511 break;
512 case ESM_GOT_FG_PREFIX:
513 state->mode = (n == 5) ? ESM_WAIT_FG_COLOR : ESM_SHORT;
514 break;
515 case ESM_GOT_BG_PREFIX:
516 state->mode = (n == 5) ? ESM_WAIT_BG_COLOR : ESM_SHORT;
517 break;
518 case ESM_WAIT_FG_COLOR:
519 if(n < state->max_colors)
520 {
521 state->fg = n;
522 }
523 state->mode = ESM_SHORT;
524 break;
525 case ESM_WAIT_BG_COLOR:
526 if(n < state->max_colors)
527 {
528 state->bg = n;
529 }
530 state->mode = ESM_SHORT;
531 break;
532 }
533 }
534
535 /* Applies one escape sequence attribute (the n parameter) to the state at a
536 * time. */
537 static void
esc_state_set_attr(esc_state * state,int n)538 esc_state_set_attr(esc_state *state, int n)
539 {
540 #ifdef HAVE_A_ITALIC_DECL
541 const int italic_attr = A_ITALIC;
542 #else
543 /* If A_ITALIC is missing (it's an extension), use A_REVERSE instead. */
544 const int italic_attr = A_REVERSE;
545 #endif
546
547 switch(n)
548 {
549 case 0:
550 state->attrs = state->defaults.attr;
551 state->fg = state->defaults.fg;
552 state->bg = state->defaults.bg;
553 break;
554 case 1:
555 state->attrs |= A_BOLD;
556 break;
557 case 2:
558 state->attrs |= A_DIM;
559 break;
560 case 3:
561 state->attrs |= italic_attr;
562 break;
563 case 4:
564 state->attrs |= A_UNDERLINE;
565 break;
566 case 5: case 6:
567 state->attrs |= A_BLINK;
568 break;
569 case 7:
570 state->attrs |= A_REVERSE;
571 break;
572 case 22:
573 state->attrs &= ~(A_BOLD | A_UNDERLINE | A_BLINK | A_REVERSE | A_DIM);
574 break;
575 case 23:
576 state->attrs &= ~italic_attr;
577 break;
578 case 24:
579 state->attrs &= ~A_UNDERLINE;
580 break;
581 case 25:
582 state->attrs &= ~A_BLINK;
583 break;
584 case 27:
585 state->attrs &= ~A_REVERSE;
586 break;
587 case 30: case 31: case 32: case 33: case 34: case 35: case 36: case 37:
588 state->fg = n - 30;
589 break;
590 case 39:
591 state->fg = -1;
592 break;
593 case 40: case 41: case 42: case 43: case 44: case 45: case 46: case 47:
594 state->bg = n - 40;
595 break;
596 case 49:
597 state->bg = -1;
598 break;
599 }
600 }
601
602 void
esc_state_init(esc_state * state,const col_attr_t * defaults,int max_colors)603 esc_state_init(esc_state *state, const col_attr_t *defaults, int max_colors)
604 {
605 state->mode = ESM_SHORT;
606 state->attrs = defaults->attr;
607 state->fg = defaults->fg;
608 state->bg = defaults->bg;
609 state->defaults = *defaults;
610 state->max_colors = max_colors;
611 }
612
613 /* Converts the leading character of the str string to a printable string. Puts
614 * number of screen character positions taken by the resulting string
615 * representation of a character into *screen_width. Returns pointer to a
616 * statically allocated buffer. */
617 TSTATIC const char *
strchar2str(const char str[],int pos,size_t * screen_width)618 strchar2str(const char str[], int pos, size_t *screen_width)
619 {
620 static char buf[128];
621
622 const size_t char_width = utf8_chrw(str);
623 if(char_width != 1 || (unsigned char)str[0] >= (unsigned char)' ')
624 {
625 if(char_width == 1 && (size_t)str[0] >= 0x80 && (size_t)str[0] < 0x100)
626 {
627 const char *name = keyname((unsigned char)str[0]);
628 if(name != NULL)
629 {
630 strcpy(buf, name);
631 *screen_width = strlen(name);
632 return buf;
633 }
634 }
635
636 memcpy(buf, str, char_width);
637 buf[char_width] = '\0';
638 *screen_width = vifm_wcwidth(get_first_wchar(str));
639 }
640 else if(str[0] == '\n')
641 {
642 buf[0] = '\0';
643 *screen_width = 0;
644 }
645 else if(str[0] == '\b')
646 {
647 strcpy(buf, "");
648 *screen_width = 0;
649 }
650 else if(str[0] == '\r')
651 {
652 strcpy(buf, "<cr>");
653 *screen_width = 4;
654 }
655 else if(str[0] == '\t')
656 {
657 const size_t space_count = cfg.tab_stop - pos%cfg.tab_stop;
658 memset(buf, ' ', space_count);
659 buf[space_count] = '\0';
660 *screen_width = space_count;
661 }
662 else if(str[0] == '\033')
663 {
664 size_t len = get_char_width_esc(str);
665 if(len >= sizeof(buf) || (len > 0 && str[len - 1] != 'm'))
666 {
667 buf[0] = '\0';
668 }
669 else
670 {
671 copy_str(buf, len + 1, str);
672 }
673 *screen_width = 0;
674 }
675 else if(iscntrl((unsigned char)str[0]))
676 {
677 buf[0] = '^';
678 buf[1] = str[0] ^ 64;
679 buf[2] = '\0';
680 *screen_width = 2;
681 }
682 else
683 {
684 /* XXX: is this code completely unreachable? */
685 buf[0] = str[0];
686 buf[1] = '\0';
687 *screen_width = 1;
688 }
689 return buf;
690 }
691
692 /* vim: set tabstop=2 softtabstop=2 shiftwidth=2 noexpandtab cinoptions-=(0 : */
693 /* vim: set cinoptions+=t0 filetype=c : */
694