xref: /minix/external/bsd/less/dist/cvt.c (revision dda632a2)
1 /*	$NetBSD: cvt.c,v 1.2 2011/07/03 19:51:26 tron Exp $	*/
2 
3 /*
4  * Copyright (C) 1984-2011  Mark Nudelman
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information about less, or for information on how to
10  * contact the author, see the README file.
11  */
12 
13 /*
14  * Routines to convert text in various ways.  Used by search.
15  */
16 
17 #include "less.h"
18 #include "charset.h"
19 
20 extern int utf_mode;
21 
22 /*
23  * Get the length of a buffer needed to convert a string.
24  */
25 	public int
26 cvt_length(len, ops)
27 	int len;
28 	int ops;
29 {
30 	if (utf_mode)
31 		/*
32 		 * Just copying a string in UTF-8 mode can cause it to grow
33 		 * in length.
34 		 * Four output bytes for one input byte is the worst case.
35 		 */
36 		len *= 4;
37 	return (len + 1);
38 }
39 
40 /*
41  * Allocate a chpos array for use by cvt_text.
42  */
43 	public int *
44 cvt_alloc_chpos(len)
45 	int len;
46 {
47 	int i;
48 	int *chpos = (int *) ecalloc(sizeof(int), len);
49 	/* Initialize all entries to an invalid position. */
50 	for (i = 0;  i < len;  i++)
51 		chpos[i] = -1;
52 	return (chpos);
53 }
54 
55 /*
56  * Convert text.  Perform the transformations specified by ops.
57  * Returns converted text in odst.  The original offset of each
58  * odst character (when it was in osrc) is returned in the chpos array.
59  */
60 	public void
61 cvt_text(odst, osrc, chpos, lenp, ops)
62 	char *odst;
63 	char *osrc;
64 	int *chpos;
65 	int *lenp;
66 	int ops;
67 {
68 	char *dst;
69 	char *src;
70 	register char *src_end;
71 	LWCHAR ch;
72 
73 	if (lenp != NULL)
74 		src_end = osrc + *lenp;
75 	else
76 		src_end = osrc + strlen(osrc);
77 
78 	for (src = osrc, dst = odst;  src < src_end;  )
79 	{
80 		int src_pos = src - osrc;
81 		int dst_pos = dst - odst;
82 		ch = step_char(&src, +1, src_end);
83 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
84 		{
85 			/* Delete backspace and preceding char. */
86 			do {
87 				dst--;
88 			} while (dst > odst &&
89 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
90 		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
91 		{
92 			/* Skip to end of ANSI escape sequence. */
93 			src++;  /* skip the CSI start char */
94 			while (src < src_end)
95 				if (!is_ansi_middle(*src++))
96 					break;
97 		} else
98 		{
99 			/* Just copy the char to the destination buffer. */
100 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
101 				ch = TO_LOWER(ch);
102 			put_wchar(&dst, ch);
103 			/*
104 			 * Record the original position of the char.
105 			 * But if we've already recorded a position
106 			 * for this char (due to a backspace), leave
107 			 * it alone; if multiple source chars map to
108 			 * one destination char, we want the position
109 			 * of the first one.
110 			 */
111 			if (chpos != NULL && chpos[dst_pos] < 0)
112 				chpos[dst_pos] = src_pos;
113 		}
114 	}
115 	if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
116 		dst--;
117 	*dst = '\0';
118 	if (lenp != NULL)
119 		*lenp = dst - odst;
120 	if (chpos != NULL)
121 		chpos[dst - odst] = src - osrc;
122 }
123