xref: /freebsd/contrib/less/cvt.c (revision d713e089)
1f0be0a1fSXin LI /*
2d713e089SXin LI  * Copyright (C) 1984-2023  Mark Nudelman
3f0be0a1fSXin LI  *
4f0be0a1fSXin LI  * You may distribute under the terms of either the GNU General Public
5f0be0a1fSXin LI  * License or the Less License, as specified in the README file.
6f0be0a1fSXin LI  *
796e55cc7SXin LI  * For more information, see the README file.
8f0be0a1fSXin LI  */
9f0be0a1fSXin LI 
10f0be0a1fSXin LI /*
11f0be0a1fSXin LI  * Routines to convert text in various ways.  Used by search.
12f0be0a1fSXin LI  */
13f0be0a1fSXin LI 
14f0be0a1fSXin LI #include "less.h"
15f0be0a1fSXin LI #include "charset.h"
16f0be0a1fSXin LI 
17f0be0a1fSXin LI extern int utf_mode;
18f0be0a1fSXin LI 
19f0be0a1fSXin LI /*
20f0be0a1fSXin LI  * Get the length of a buffer needed to convert a string.
21f0be0a1fSXin LI  */
cvt_length(int len,int ops)22d713e089SXin LI public int cvt_length(int len, int ops)
23f0be0a1fSXin LI {
24f0be0a1fSXin LI 	if (utf_mode)
25f0be0a1fSXin LI 		/*
26f0be0a1fSXin LI 		 * Just copying a string in UTF-8 mode can cause it to grow
27f0be0a1fSXin LI 		 * in length.
28f0be0a1fSXin LI 		 * Four output bytes for one input byte is the worst case.
29f0be0a1fSXin LI 		 */
30f0be0a1fSXin LI 		len *= 4;
31f0be0a1fSXin LI 	return (len + 1);
32f0be0a1fSXin LI }
33f0be0a1fSXin LI 
34f0be0a1fSXin LI /*
35f0be0a1fSXin LI  * Allocate a chpos array for use by cvt_text.
36f0be0a1fSXin LI  */
cvt_alloc_chpos(int len)37d713e089SXin LI public int * cvt_alloc_chpos(int len)
38f0be0a1fSXin LI {
39f0be0a1fSXin LI 	int i;
40f0be0a1fSXin LI 	int *chpos = (int *) ecalloc(sizeof(int), len);
41f0be0a1fSXin LI 	/* Initialize all entries to an invalid position. */
42f0be0a1fSXin LI 	for (i = 0;  i < len;  i++)
43f0be0a1fSXin LI 		chpos[i] = -1;
44f0be0a1fSXin LI 	return (chpos);
45f0be0a1fSXin LI }
46f0be0a1fSXin LI 
47f0be0a1fSXin LI /*
48f0be0a1fSXin LI  * Convert text.  Perform the transformations specified by ops.
49f0be0a1fSXin LI  * Returns converted text in odst.  The original offset of each
50f0be0a1fSXin LI  * odst character (when it was in osrc) is returned in the chpos array.
51f0be0a1fSXin LI  */
cvt_text(char * odst,char * osrc,int * chpos,int * lenp,int ops)52d713e089SXin LI public void cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
53f0be0a1fSXin LI {
54f0be0a1fSXin LI 	char *dst;
5596e55cc7SXin LI 	char *edst = odst;
56f0be0a1fSXin LI 	char *src;
571ea31627SRobert Watson 	char *src_end;
58f0be0a1fSXin LI 	LWCHAR ch;
59f0be0a1fSXin LI 
60f0be0a1fSXin LI 	if (lenp != NULL)
61f0be0a1fSXin LI 		src_end = osrc + *lenp;
62f0be0a1fSXin LI 	else
63f0be0a1fSXin LI 		src_end = osrc + strlen(osrc);
64f0be0a1fSXin LI 
65f0be0a1fSXin LI 	for (src = osrc, dst = odst;  src < src_end;  )
66f0be0a1fSXin LI 	{
67a15691bfSXin LI 		int src_pos = (int) (src - osrc);
68a15691bfSXin LI 		int dst_pos = (int) (dst - odst);
692235c7feSXin LI 		struct ansi_state *pansi;
70f6b74a7dSXin LI 		ch = step_char(&src, +1, src_end);
71f0be0a1fSXin LI 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
72f0be0a1fSXin LI 		{
73f0be0a1fSXin LI 			/* Delete backspace and preceding char. */
74f0be0a1fSXin LI 			do {
75f0be0a1fSXin LI 				dst--;
76b2ea2440SXin LI 			} while (dst > odst && utf_mode &&
77f0be0a1fSXin LI 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
782235c7feSXin LI 		} else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL)
79f0be0a1fSXin LI 		{
80f0be0a1fSXin LI 			/* Skip to end of ANSI escape sequence. */
81f0be0a1fSXin LI 			while (src < src_end)
822235c7feSXin LI 			{
832235c7feSXin LI 				if (ansi_step(pansi, ch) != ANSI_MID)
84f0be0a1fSXin LI 					break;
852235c7feSXin LI 				ch = *src++;
862235c7feSXin LI 			}
872235c7feSXin LI 			ansi_done(pansi);
88f0be0a1fSXin LI 		} else
89f0be0a1fSXin LI 		{
90f0be0a1fSXin LI 			/* Just copy the char to the destination buffer. */
91f0be0a1fSXin LI 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
92f0be0a1fSXin LI 				ch = TO_LOWER(ch);
93f0be0a1fSXin LI 			put_wchar(&dst, ch);
9496e55cc7SXin LI 			/* Record the original position of the char. */
9596e55cc7SXin LI 			if (chpos != NULL)
96f0be0a1fSXin LI 				chpos[dst_pos] = src_pos;
97f0be0a1fSXin LI 		}
9896e55cc7SXin LI 		if (dst > edst)
9996e55cc7SXin LI 			edst = dst;
100f0be0a1fSXin LI 	}
10196e55cc7SXin LI 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
10296e55cc7SXin LI 		edst--;
10396e55cc7SXin LI 	*edst = '\0';
104f0be0a1fSXin LI 	if (lenp != NULL)
105a15691bfSXin LI 		*lenp = (int) (edst - odst);
10696e55cc7SXin LI 	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
107f0be0a1fSXin LI }
108