1 /* vifm
2  * Copyright (C) 2013 xaizek.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
17  */
18 
19 #include "escape.h"
20 
21 #include <regex.h>
22 
23 #include <curses.h>
24 
25 #include <assert.h> /* assert() */
26 #include <ctype.h> /* iscntrl() isdigit() */
27 #include <stddef.h> /* NULL size_t */
28 #include <stdlib.h> /* free() malloc() realloc() strtol() */
29 #include <string.h> /* memcpy() memset() strchr() strcpy() strdup() strlen()
30                        strncpy() */
31 
32 #include "../cfg/config.h"
33 #include "../compat/reallocarray.h"
34 #include "../utils/test_helpers.h"
35 #include "../utils/str.h"
36 #include "../utils/utf8.h"
37 #include "../utils/utils.h"
38 #include "ui.h"
39 
40 static char * add_pattern_highlights(const char line[], size_t len,
41 		const char no_esc[], const int offsets[], const regex_t *re);
42 static size_t correct_offset(const char line[], const int offsets[],
43 		size_t offset);
44 static size_t count_substr_chars(const char line[], regmatch_t *match);
45 static char * add_highlighted_substr(const char sub[], size_t sub_len,
46 		char out[]);
47 static char * add_highlighted_sym(const char sym[], size_t sym_width,
48 		char out[]);
49 TSTATIC size_t get_char_width_esc(const char str[]);
50 static void print_char_esc(WINDOW *win, const char str[], esc_state *state);
51 TSTATIC void esc_state_update(esc_state *state, const char str[]);
52 static void esc_state_process_attr(esc_state *state, int n);
53 static void esc_state_set_attr(esc_state *state, int n);
54 TSTATIC const char * strchar2str(const char str[], int pos,
55 		size_t *screen_width);
56 
57 /* Escape sequence which starts block of highlighted symbols. */
58 static const char INV_START[] = "\033[7,1m";
59 /* Escape sequence which ends block of highlighted symbols. */
60 static const char INV_END[] = "\033[27,22m";
61 /* Number of extra characters added to highlight one string object. */
62 static const size_t INV_OVERHEAD = sizeof(INV_START) - 1 + sizeof(INV_END) - 1;
63 
64 char *
esc_remove(const char str[])65 esc_remove(const char str[])
66 {
67 	char *no_esc = strdup(str);
68 	if(no_esc != NULL)
69 	{
70 		char *p = no_esc;
71 		while(*str != '\0')
72 		{
73 			const size_t char_width_esc = get_char_width_esc(str);
74 			if(*str != '\033')
75 			{
76 				memcpy(p, str, char_width_esc);
77 				p += char_width_esc;
78 			}
79 			str += char_width_esc;
80 		}
81 		*p = '\0';
82 	}
83 	return no_esc;
84 }
85 
86 size_t
esc_str_overhead(const char str[])87 esc_str_overhead(const char str[])
88 {
89 	size_t overhead = 0U;
90 	while(*str != '\0')
91 	{
92 		const size_t char_width_esc = get_char_width_esc(str);
93 		if(*str == '\033')
94 		{
95 			overhead += char_width_esc;
96 		}
97 		str += char_width_esc;
98 	}
99 	return overhead;
100 }
101 
102 char *
esc_highlight_pattern(const char line[],const regex_t * re)103 esc_highlight_pattern(const char line[], const regex_t *re)
104 {
105 	char *processed;
106 	const size_t len = strlen(line);
107 
108 	int *const offsets = reallocarray(NULL, len + 1, sizeof(int));
109 
110 	char *const no_esc = malloc(len + 1);
111 	char *no_esc_sym = no_esc;
112 	int no_esc_sym_pos = 0;
113 
114 	/* Fill no_esc and offsets. */
115 	const char *src_sym = line;
116 	while(*src_sym != '\0')
117 	{
118 		const size_t char_width_esc = get_char_width_esc(src_sym);
119 		if(*src_sym != '\033')
120 		{
121 			size_t i;
122 			const int offset = src_sym - line;
123 			/* Each offset value is filled over whole character width. */
124 			for(i = 0U; i < char_width_esc; ++i)
125 			{
126 				offsets[no_esc_sym_pos + i] = offset;
127 			}
128 			no_esc_sym_pos += char_width_esc;
129 
130 			memcpy(no_esc_sym, src_sym, char_width_esc);
131 			no_esc_sym += char_width_esc;
132 		}
133 		src_sym += char_width_esc;
134 	}
135 	offsets[no_esc_sym_pos] = src_sym - line;
136 	*no_esc_sym = '\0';
137 	assert(no_esc_sym_pos == no_esc_sym - no_esc);
138 
139 	processed = add_pattern_highlights(line, len, no_esc, offsets, re);
140 
141 	free(offsets);
142 	free(no_esc);
143 
144 	return (processed == NULL) ? strdup(line) : processed;
145 }
146 
147 /* Forms new line with highlights of matcher of the re regular expression using
148  * escape sequences that invert colors.  Returns NULL when no match found or
149  * memory allocation error occurred. */
150 static char *
add_pattern_highlights(const char line[],size_t len,const char no_esc[],const int offsets[],const regex_t * re)151 add_pattern_highlights(const char line[], size_t len, const char no_esc[],
152 		const int offsets[], const regex_t *re)
153 {
154 	/* XXX: this might benefit from a rewrite, logic of when escape sequences are
155 	 *      copied is unclear (sometimes along with first matched character,
156 	 *      sometimes before the match). */
157 
158 	regmatch_t match;
159 	char *next;
160 	char *processed;
161 	int no_esc_pos = 0;
162 	int overhead = 0;
163 	int offset;
164 
165 	if(regexec(re, no_esc, 1, &match, 0) != 0)
166 	{
167 		return NULL;
168 	}
169 	if((processed = malloc(len + 1)) == NULL)
170 	{
171 		return NULL;
172 	}
173 
174 	/* Before the first match. */
175 	if(match.rm_so != 0 && no_esc[match.rm_so] == '\0')
176 	{
177 		/* This is needed to handle possibility of immediate break from the loop
178 		 * below. */
179 		offset = correct_offset(line, offsets, match.rm_so);
180 	}
181 	else
182 	{
183 		offset = offsets[match.rm_so];
184 	}
185 	strncpy(processed, line, offset);
186 	next = processed + offset;
187 
188 	/* All matches. */
189 	do
190 	{
191 		int so_offset;
192 		void *ptr;
193 		const int empty_match = (match.rm_so == match.rm_eo);
194 
195 		match.rm_so += no_esc_pos;
196 		match.rm_eo += no_esc_pos;
197 
198 		so_offset = offsets[match.rm_so];
199 
200 		if(empty_match)
201 		{
202 			if(no_esc[match.rm_eo] == '\0')
203 			{
204 				no_esc_pos = match.rm_eo;
205 				break;
206 			}
207 		}
208 
209 		/* Between matches. */
210 		if(no_esc_pos != 0)
211 		{
212 			const int corrected = correct_offset(line, offsets, no_esc_pos);
213 			strncpy(next, line + corrected, so_offset - corrected);
214 		}
215 
216 		if(empty_match)
217 		{
218 			/* Copy single character after the match to advance forward. */
219 
220 			/* Position inside the line string. */
221 			const int esc_pos = (no_esc_pos == 0)
222 			                  ? (size_t)(next - processed)
223 			                  : correct_offset(line, offsets, no_esc_pos);
224 			/* Number of characters to copy from the line string. */
225 			const int len = (match.rm_so == 0)
226 			              ? utf8_chrw(no_esc)
227 			              : correct_offset(line, offsets, match.rm_so + 1) - esc_pos;
228 			strncpy(next, line + esc_pos, len);
229 			next += len;
230 			no_esc_pos += utf8_chrw(&no_esc[no_esc_pos]);
231 		}
232 		else
233 		{
234 			size_t new_overhead;
235 			size_t match_len;
236 
237 			new_overhead = INV_OVERHEAD*count_substr_chars(no_esc, &match);
238 			len += new_overhead;
239 			if((ptr = realloc(processed, len + 1)) == NULL)
240 			{
241 				free(processed);
242 				return NULL;
243 			}
244 			processed = ptr;
245 
246 			match_len = correct_offset(line, offsets, match.rm_eo) - so_offset;
247 			next = processed + so_offset + overhead;
248 			next = add_highlighted_substr(line + so_offset, match_len, next);
249 
250 			no_esc_pos = match.rm_eo;
251 			overhead += new_overhead;
252 		}
253 	}
254 	while(regexec(re, no_esc + no_esc_pos, 1, &match, 0) == 0);
255 
256 	/* Abort if there were no non-empty matches. */
257 	if(overhead == 0)
258 	{
259 		free(processed);
260 		return 0;
261 	}
262 
263 	/* After the last match. */
264 	strcpy(next, line +
265 			(no_esc_pos == 0 ? (size_t)(next - processed) :
266 			 correct_offset(line, offsets, no_esc_pos)));
267 
268 	return processed;
269 }
270 
271 /* Corrects offset inside the line so that it points to the char after previous
272  * character instead of the beginning of the current one. */
273 static size_t
correct_offset(const char line[],const int offsets[],size_t offset)274 correct_offset(const char line[], const int offsets[], size_t offset)
275 {
276 	assert(offset != 0U && "Offset has to be greater than zero.");
277 	const int prev_offset = offsets[offset - 1];
278 	const size_t char_width = utf8_chrw(line + prev_offset);
279 	return prev_offset + char_width;
280 }
281 
282 /* Counts number of multi-byte characters inside the match of a regular
283  * expression. */
284 static size_t
count_substr_chars(const char line[],regmatch_t * match)285 count_substr_chars(const char line[], regmatch_t *match)
286 {
287 	const size_t sub_len = match->rm_eo - match->rm_so;
288 	const char *const sub = line + match->rm_so;
289 	size_t count = 0;
290 	size_t i = 0;
291 	while(i < sub_len)
292 	{
293 		const size_t char_width_esc = get_char_width_esc(sub + i);
294 		i += char_width_esc;
295 		count++;
296 	}
297 	return count;
298 }
299 
300 /* Adds all symbols of substring pointed to by the sub parameter of the length
301  * sub_len to the out buffer with highlight effect applied.  Returns next
302  * position in the out buffer. */
303 static char *
add_highlighted_substr(const char sub[],size_t sub_len,char out[])304 add_highlighted_substr(const char sub[], size_t sub_len, char out[])
305 {
306 	size_t i = 0;
307 	while(i < sub_len)
308 	{
309 		const size_t char_width_esc = get_char_width_esc(sub + i);
310 		out = add_highlighted_sym(sub + i, char_width_esc, out);
311 		i += char_width_esc;
312 	}
313 	return out;
314 }
315 
316 /* Adds one symbol pointed to by the sym parameter of the length sym_width to
317  * the out buffer with highlight effect applied.  Returns next position in the
318  * out buffer. */
319 static char *
add_highlighted_sym(const char sym[],size_t sym_width,char out[])320 add_highlighted_sym(const char sym[], size_t sym_width, char out[])
321 {
322 	if(sym[0] != '\033')
323 	{
324 		memcpy(out, INV_START, sizeof(INV_START) - 1);
325 		out += sizeof(INV_START) - 1;
326 	}
327 
328 	strncpy(out, sym, sym_width);
329 	out += sym_width;
330 
331 	if(sym[0] != '\033')
332 	{
333 		memcpy(out, INV_END, sizeof(INV_END) - 1);
334 		out += sizeof(INV_END) - 1;
335 	}
336 
337 	return out;
338 }
339 
340 int
esc_print_line(const char line[],WINDOW * win,int column,int row,int max_width,int dry_run,int truncated,esc_state * state,int * printed)341 esc_print_line(const char line[], WINDOW *win, int column, int row,
342 		int max_width, int dry_run, int truncated, esc_state *state, int *printed)
343 {
344 	const char *curr = line;
345 	size_t pos = 0U;
346 	checked_wmove(win, row, column);
347 
348 	/* Attributes are set at the beginning of each line and after state change. */
349 	col_attr_t col = { .fg = state->fg, .bg = state->bg, .attr = state->attrs };
350 	ui_set_attr(win, &col, -1);
351 
352 	while(pos <= (size_t)max_width && *curr != '\0')
353 	{
354 		size_t screen_width;
355 		const char *const char_str = strchar2str(curr, pos, &screen_width);
356 		pos += screen_width;
357 		if(pos <= (size_t)max_width)
358 		{
359 			if(!dry_run || screen_width == 0)
360 			{
361 				/* Compute real screen width by how much cursor was moved.  Sometimes
362 				 * character width differs from what it should be. */
363 				int old_x = getcurx(win);
364 
365 				print_char_esc(win, char_str, state);
366 
367 				int new_x = getcurx(win);
368 				if(new_x < old_x)
369 				{
370 					new_x += getmaxx(win);
371 				}
372 				pos += (new_x - old_x) - screen_width;
373 			}
374 
375 			if(*curr == '\b')
376 			{
377 				if(!dry_run)
378 				{
379 					int y, x;
380 					getyx(win, y, x);
381 					if(x > 0)
382 					{
383 						checked_wmove(win, y, x - 1);
384 					}
385 				}
386 
387 				if(pos > 0)
388 				{
389 					pos--;
390 				}
391 			}
392 
393 			curr += get_char_width_esc(curr);
394 		}
395 	}
396 	*printed = pos;
397 
398 	if(truncated)
399 	{
400 		/* Process remaining escape sequences of the line in order to preserve all
401 		 * elements of highlighting even when lines are not fully drawn. */
402 		const char *tail = curr - 1;
403 		while((tail = strchr(tail + 1, '\033')) != NULL)
404 		{
405 			size_t screen_width;
406 			const char *const char_str = strchar2str(tail, 0, &screen_width);
407 			print_char_esc(win, char_str, state);
408 		}
409 	}
410 
411 	return curr - line;
412 }
413 
414 /* Returns number of characters at the beginning of the str which form one
415  * logical symbol.  Takes UTF-8 encoding and terminal escape sequences into
416  * account. */
417 TSTATIC size_t
get_char_width_esc(const char str[])418 get_char_width_esc(const char str[])
419 {
420 	if(*str != '\033')
421 	{
422 		return utf8_chrw(str);
423 	}
424 
425 	/* Skip prefix. */
426 	const char *p = str + 1;
427 	if(*p == '[')
428 	{
429 		++p;
430 	}
431 
432 	/* And sequences of numbers separated by ";" or ",". */
433 	while(isdigit(*p))
434 	{
435 		do
436 		{
437 			++p;
438 		}
439 		while(isdigit(*p));
440 
441 		if(*p == ';' || *p == ',')
442 		{
443 			++p;
444 		}
445 	}
446 	return (*p == '\0' ? p - str : p - str + 1);
447 }
448 
449 /* Prints the leading character of the str to the win window parsing terminal
450  * escape sequences. */
451 static void
print_char_esc(WINDOW * win,const char str[],esc_state * state)452 print_char_esc(WINDOW *win, const char str[], esc_state *state)
453 {
454 	if(str[0] == '\033')
455 	{
456 		esc_state_update(state, str);
457 		col_attr_t col = { .fg = state->fg, .bg = state->bg, .attr = state->attrs };
458 		ui_set_attr(win, &col, -1);
459 	}
460 	else
461 	{
462 		wprint(win, str);
463 	}
464 }
465 
466 /* Handles escape sequence.  Applies whole escape sequence specified by the str
467  * to the state. */
468 TSTATIC void
esc_state_update(esc_state * state,const char str[])469 esc_state_update(esc_state *state, const char str[])
470 {
471 	str++;
472 	do
473 	{
474 		int n = 0;
475 		if(isdigit(str[1]))
476 		{
477 			char *end;
478 			n = strtol(str + 1, &end, 10);
479 			str = end;
480 		}
481 		else
482 		{
483 			str++;
484 		}
485 
486 		esc_state_process_attr(state, n);
487 	}
488 	while(str[0] == ';');
489 }
490 
491 /* Processes one escape sequence attribute at a time.  Handles both standard and
492  * extended (xterm256) escape sequences. */
493 static void
esc_state_process_attr(esc_state * state,int n)494 esc_state_process_attr(esc_state *state, int n)
495 {
496 	switch(state->mode)
497 	{
498 		case ESM_SHORT:
499 			switch(n)
500 			{
501 				case 38:
502 					state->mode = ESM_GOT_FG_PREFIX;
503 					break;
504 				case 48:
505 					state->mode = ESM_GOT_BG_PREFIX;
506 					break;
507 				default:
508 					esc_state_set_attr(state, n);
509 					break;
510 			}
511 			break;
512 		case ESM_GOT_FG_PREFIX:
513 			state->mode = (n == 5) ? ESM_WAIT_FG_COLOR : ESM_SHORT;
514 			break;
515 		case ESM_GOT_BG_PREFIX:
516 			state->mode = (n == 5) ? ESM_WAIT_BG_COLOR : ESM_SHORT;
517 			break;
518 		case ESM_WAIT_FG_COLOR:
519 			if(n < state->max_colors)
520 			{
521 				state->fg = n;
522 			}
523 			state->mode = ESM_SHORT;
524 			break;
525 		case ESM_WAIT_BG_COLOR:
526 			if(n < state->max_colors)
527 			{
528 				state->bg = n;
529 			}
530 			state->mode = ESM_SHORT;
531 			break;
532 	}
533 }
534 
535 /* Applies one escape sequence attribute (the n parameter) to the state at a
536  * time. */
537 static void
esc_state_set_attr(esc_state * state,int n)538 esc_state_set_attr(esc_state *state, int n)
539 {
540 #ifdef HAVE_A_ITALIC_DECL
541 	const int italic_attr = A_ITALIC;
542 #else
543 	/* If A_ITALIC is missing (it's an extension), use A_REVERSE instead. */
544 	const int italic_attr = A_REVERSE;
545 #endif
546 
547 	switch(n)
548 	{
549 		case 0:
550 			state->attrs = state->defaults.attr;
551 			state->fg = state->defaults.fg;
552 			state->bg = state->defaults.bg;
553 			break;
554 		case 1:
555 			state->attrs |= A_BOLD;
556 			break;
557 		case 2:
558 			state->attrs |= A_DIM;
559 			break;
560 		case 3:
561 			state->attrs |= italic_attr;
562 			break;
563 		case 4:
564 			state->attrs |= A_UNDERLINE;
565 			break;
566 		case 5: case 6:
567 			state->attrs |= A_BLINK;
568 			break;
569 		case 7:
570 			state->attrs |= A_REVERSE;
571 			break;
572 		case 22:
573 			state->attrs &= ~(A_BOLD | A_UNDERLINE | A_BLINK | A_REVERSE | A_DIM);
574 			break;
575 		case 23:
576 			state->attrs &= ~italic_attr;
577 			break;
578 		case 24:
579 			state->attrs &= ~A_UNDERLINE;
580 			break;
581 		case 25:
582 			state->attrs &= ~A_BLINK;
583 			break;
584 		case 27:
585 			state->attrs &= ~A_REVERSE;
586 			break;
587 		case 30: case 31: case 32: case 33: case 34: case 35: case 36: case 37:
588 			state->fg = n - 30;
589 			break;
590 		case 39:
591 			state->fg = -1;
592 			break;
593 		case 40: case 41: case 42: case 43: case 44: case 45: case 46: case 47:
594 			state->bg = n - 40;
595 			break;
596 		case 49:
597 			state->bg = -1;
598 			break;
599 	}
600 }
601 
602 void
esc_state_init(esc_state * state,const col_attr_t * defaults,int max_colors)603 esc_state_init(esc_state *state, const col_attr_t *defaults, int max_colors)
604 {
605 	state->mode = ESM_SHORT;
606 	state->attrs = defaults->attr;
607 	state->fg = defaults->fg;
608 	state->bg = defaults->bg;
609 	state->defaults = *defaults;
610 	state->max_colors = max_colors;
611 }
612 
613 /* Converts the leading character of the str string to a printable string.  Puts
614  * number of screen character positions taken by the resulting string
615  * representation of a character into *screen_width.  Returns pointer to a
616  * statically allocated buffer. */
617 TSTATIC const char *
strchar2str(const char str[],int pos,size_t * screen_width)618 strchar2str(const char str[], int pos, size_t *screen_width)
619 {
620 	static char buf[128];
621 
622 	const size_t char_width = utf8_chrw(str);
623 	if(char_width != 1 || (unsigned char)str[0] >= (unsigned char)' ')
624 	{
625 		if(char_width == 1 && (size_t)str[0] >= 0x80 && (size_t)str[0] < 0x100)
626 		{
627 			const char *name = keyname((unsigned char)str[0]);
628 			if(name != NULL)
629 			{
630 				strcpy(buf, name);
631 				*screen_width = strlen(name);
632 				return buf;
633 			}
634 		}
635 
636 		memcpy(buf, str, char_width);
637 		buf[char_width] = '\0';
638 		*screen_width = vifm_wcwidth(get_first_wchar(str));
639 	}
640 	else if(str[0] == '\n')
641 	{
642 		buf[0] = '\0';
643 		*screen_width = 0;
644 	}
645 	else if(str[0] == '\b')
646 	{
647 		strcpy(buf, "");
648 		*screen_width = 0;
649 	}
650 	else if(str[0] == '\r')
651 	{
652 		strcpy(buf, "<cr>");
653 		*screen_width = 4;
654 	}
655 	else if(str[0] == '\t')
656 	{
657 		const size_t space_count = cfg.tab_stop - pos%cfg.tab_stop;
658 		memset(buf, ' ', space_count);
659 		buf[space_count] = '\0';
660 		*screen_width = space_count;
661 	}
662 	else if(str[0] == '\033')
663 	{
664 		size_t len = get_char_width_esc(str);
665 		if(len >= sizeof(buf) || (len > 0 && str[len - 1] != 'm'))
666 		{
667 			buf[0] = '\0';
668 		}
669 		else
670 		{
671 			copy_str(buf, len + 1, str);
672 		}
673 		*screen_width = 0;
674 	}
675 	else if(iscntrl((unsigned char)str[0]))
676 	{
677 		buf[0] = '^';
678 		buf[1] = str[0] ^ 64;
679 		buf[2] = '\0';
680 		*screen_width = 2;
681 	}
682 	else
683 	{
684 		/* XXX: is this code completely unreachable? */
685 		buf[0] = str[0];
686 		buf[1] = '\0';
687 		*screen_width = 1;
688 	}
689 	return buf;
690 }
691 
692 /* vim: set tabstop=2 softtabstop=2 shiftwidth=2 noexpandtab cinoptions-=(0 : */
693 /* vim: set cinoptions+=t0 filetype=c : */
694