1f0be0a1fSXin LI /*
2d713e089SXin LI * Copyright (C) 1984-2023 Mark Nudelman
3f0be0a1fSXin LI *
4f0be0a1fSXin LI * You may distribute under the terms of either the GNU General Public
5f0be0a1fSXin LI * License or the Less License, as specified in the README file.
6f0be0a1fSXin LI *
796e55cc7SXin LI * For more information, see the README file.
8f0be0a1fSXin LI */
9f0be0a1fSXin LI
10f0be0a1fSXin LI /*
11f0be0a1fSXin LI * Routines to convert text in various ways. Used by search.
12f0be0a1fSXin LI */
13f0be0a1fSXin LI
14f0be0a1fSXin LI #include "less.h"
15f0be0a1fSXin LI #include "charset.h"
16f0be0a1fSXin LI
17f0be0a1fSXin LI extern int utf_mode;
18f0be0a1fSXin LI
19f0be0a1fSXin LI /*
20f0be0a1fSXin LI * Get the length of a buffer needed to convert a string.
21f0be0a1fSXin LI */
cvt_length(int len,int ops)22d713e089SXin LI public int cvt_length(int len, int ops)
23f0be0a1fSXin LI {
24f0be0a1fSXin LI if (utf_mode)
25f0be0a1fSXin LI /*
26f0be0a1fSXin LI * Just copying a string in UTF-8 mode can cause it to grow
27f0be0a1fSXin LI * in length.
28f0be0a1fSXin LI * Four output bytes for one input byte is the worst case.
29f0be0a1fSXin LI */
30f0be0a1fSXin LI len *= 4;
31f0be0a1fSXin LI return (len + 1);
32f0be0a1fSXin LI }
33f0be0a1fSXin LI
34f0be0a1fSXin LI /*
35f0be0a1fSXin LI * Allocate a chpos array for use by cvt_text.
36f0be0a1fSXin LI */
cvt_alloc_chpos(int len)37d713e089SXin LI public int * cvt_alloc_chpos(int len)
38f0be0a1fSXin LI {
39f0be0a1fSXin LI int i;
40f0be0a1fSXin LI int *chpos = (int *) ecalloc(sizeof(int), len);
41f0be0a1fSXin LI /* Initialize all entries to an invalid position. */
42f0be0a1fSXin LI for (i = 0; i < len; i++)
43f0be0a1fSXin LI chpos[i] = -1;
44f0be0a1fSXin LI return (chpos);
45f0be0a1fSXin LI }
46f0be0a1fSXin LI
47f0be0a1fSXin LI /*
48f0be0a1fSXin LI * Convert text. Perform the transformations specified by ops.
49f0be0a1fSXin LI * Returns converted text in odst. The original offset of each
50f0be0a1fSXin LI * odst character (when it was in osrc) is returned in the chpos array.
51f0be0a1fSXin LI */
cvt_text(char * odst,char * osrc,int * chpos,int * lenp,int ops)52d713e089SXin LI public void cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
53f0be0a1fSXin LI {
54f0be0a1fSXin LI char *dst;
5596e55cc7SXin LI char *edst = odst;
56f0be0a1fSXin LI char *src;
571ea31627SRobert Watson char *src_end;
58f0be0a1fSXin LI LWCHAR ch;
59f0be0a1fSXin LI
60f0be0a1fSXin LI if (lenp != NULL)
61f0be0a1fSXin LI src_end = osrc + *lenp;
62f0be0a1fSXin LI else
63f0be0a1fSXin LI src_end = osrc + strlen(osrc);
64f0be0a1fSXin LI
65f0be0a1fSXin LI for (src = osrc, dst = odst; src < src_end; )
66f0be0a1fSXin LI {
67a15691bfSXin LI int src_pos = (int) (src - osrc);
68a15691bfSXin LI int dst_pos = (int) (dst - odst);
692235c7feSXin LI struct ansi_state *pansi;
70f6b74a7dSXin LI ch = step_char(&src, +1, src_end);
71f0be0a1fSXin LI if ((ops & CVT_BS) && ch == '\b' && dst > odst)
72f0be0a1fSXin LI {
73f0be0a1fSXin LI /* Delete backspace and preceding char. */
74f0be0a1fSXin LI do {
75f0be0a1fSXin LI dst--;
76b2ea2440SXin LI } while (dst > odst && utf_mode &&
77f0be0a1fSXin LI !IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
782235c7feSXin LI } else if ((ops & CVT_ANSI) && (pansi = ansi_start(ch)) != NULL)
79f0be0a1fSXin LI {
80f0be0a1fSXin LI /* Skip to end of ANSI escape sequence. */
81f0be0a1fSXin LI while (src < src_end)
822235c7feSXin LI {
832235c7feSXin LI if (ansi_step(pansi, ch) != ANSI_MID)
84f0be0a1fSXin LI break;
852235c7feSXin LI ch = *src++;
862235c7feSXin LI }
872235c7feSXin LI ansi_done(pansi);
88f0be0a1fSXin LI } else
89f0be0a1fSXin LI {
90f0be0a1fSXin LI /* Just copy the char to the destination buffer. */
91f0be0a1fSXin LI if ((ops & CVT_TO_LC) && IS_UPPER(ch))
92f0be0a1fSXin LI ch = TO_LOWER(ch);
93f0be0a1fSXin LI put_wchar(&dst, ch);
9496e55cc7SXin LI /* Record the original position of the char. */
9596e55cc7SXin LI if (chpos != NULL)
96f0be0a1fSXin LI chpos[dst_pos] = src_pos;
97f0be0a1fSXin LI }
9896e55cc7SXin LI if (dst > edst)
9996e55cc7SXin LI edst = dst;
100f0be0a1fSXin LI }
10196e55cc7SXin LI if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
10296e55cc7SXin LI edst--;
10396e55cc7SXin LI *edst = '\0';
104f0be0a1fSXin LI if (lenp != NULL)
105a15691bfSXin LI *lenp = (int) (edst - odst);
10696e55cc7SXin LI /* FIXME: why was this here? if (chpos != NULL) chpos[dst - odst] = src - osrc; */
107f0be0a1fSXin LI }
108