1 /* Line breaking of UTF-8 strings.
2 Copyright (C) 2001-2003, 2006-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #include <config.h>
19
20 /* Specification. */
21 #include "unilbrk.h"
22
23 #include "unistr.h"
24 #include "uniwidth.h"
25
26 int
u8_width_linebreaks(const uint8_t * s,size_t n,int width,int start_column,int at_end_columns,const char * o,const char * encoding,char * p)27 u8_width_linebreaks (const uint8_t *s, size_t n,
28 int width, int start_column, int at_end_columns,
29 const char *o, const char *encoding,
30 char *p)
31 {
32 const uint8_t *s_end;
33 char *last_p;
34 int last_column;
35 int piece_width;
36
37 u8_possible_linebreaks (s, n, encoding, p);
38
39 s_end = s + n;
40 last_p = NULL;
41 last_column = start_column;
42 piece_width = 0;
43 while (s < s_end)
44 {
45 ucs4_t uc;
46 int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
47
48 /* Respect the override. */
49 if (o != NULL && *o != UC_BREAK_UNDEFINED)
50 *p = *o;
51
52 if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
53 {
54 /* An atomic piece of text ends here. */
55 if (last_p != NULL && last_column + piece_width > width)
56 {
57 /* Insert a line break. */
58 *last_p = UC_BREAK_POSSIBLE;
59 last_column = 0;
60 }
61 }
62
63 if (*p == UC_BREAK_MANDATORY)
64 {
65 /* uc is a line break character. */
66 /* Start a new piece at column 0. */
67 last_p = NULL;
68 last_column = 0;
69 piece_width = 0;
70 }
71 else
72 {
73 /* uc is not a line break character. */
74 int w;
75
76 if (*p == UC_BREAK_POSSIBLE)
77 {
78 /* Start a new piece. */
79 last_p = p;
80 last_column += piece_width;
81 piece_width = 0;
82 /* No line break for the moment, may be turned into
83 UC_BREAK_POSSIBLE later, via last_p. */
84 }
85
86 *p = UC_BREAK_PROHIBITED;
87
88 w = uc_width (uc, encoding);
89 if (w >= 0) /* ignore control characters in the string */
90 piece_width += w;
91 }
92
93 s += count;
94 p += count;
95 if (o != NULL)
96 o += count;
97 }
98
99 /* The last atomic piece of text ends here. */
100 if (last_p != NULL && last_column + piece_width + at_end_columns > width)
101 {
102 /* Insert a line break. */
103 *last_p = UC_BREAK_POSSIBLE;
104 last_column = 0;
105 }
106
107 return last_column + piece_width;
108 }
109
110
111 #ifdef TEST
112
113 #include <stdio.h>
114 #include <stdlib.h>
115 #include <string.h>
116
117 /* Read the contents of an input stream, and return it, terminated with a NUL
118 byte. */
119 char *
read_file(FILE * stream)120 read_file (FILE *stream)
121 {
122 #define BUFSIZE 4096
123 char *buf = NULL;
124 int alloc = 0;
125 int size = 0;
126 int count;
127
128 while (! feof (stream))
129 {
130 if (size + BUFSIZE > alloc)
131 {
132 alloc = alloc + alloc / 2;
133 if (alloc < size + BUFSIZE)
134 alloc = size + BUFSIZE;
135 buf = realloc (buf, alloc);
136 if (buf == NULL)
137 {
138 fprintf (stderr, "out of memory\n");
139 exit (1);
140 }
141 }
142 count = fread (buf + size, 1, BUFSIZE, stream);
143 if (count == 0)
144 {
145 if (ferror (stream))
146 {
147 perror ("fread");
148 exit (1);
149 }
150 }
151 else
152 size += count;
153 }
154 buf = realloc (buf, size + 1);
155 if (buf == NULL)
156 {
157 fprintf (stderr, "out of memory\n");
158 exit (1);
159 }
160 buf[size] = '\0';
161 return buf;
162 #undef BUFSIZE
163 }
164
165 int
main(int argc,char * argv[])166 main (int argc, char * argv[])
167 {
168 if (argc == 2)
169 {
170 /* Insert line breaks for a given width. */
171 int width = atoi (argv[1]);
172 char *input = read_file (stdin);
173 int length = strlen (input);
174 char *breaks = malloc (length);
175 int i;
176
177 u8_width_linebreaks ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
178
179 for (i = 0; i < length; i++)
180 {
181 switch (breaks[i])
182 {
183 case UC_BREAK_POSSIBLE:
184 putc ('\n', stdout);
185 break;
186 case UC_BREAK_MANDATORY:
187 break;
188 case UC_BREAK_PROHIBITED:
189 break;
190 default:
191 abort ();
192 }
193 putc (input[i], stdout);
194 }
195
196 free (breaks);
197
198 return 0;
199 }
200 else
201 return 1;
202 }
203
204 #endif /* TEST */
205