1 /*
2    Dynamic paragraph formatting.
3 
4    Copyright (C) 2011-2021
5    Free Software Foundation, Inc.
6 
7    Copyright (C) 1996 Paul Sheer
8 
9    Writen by:
10    Paul Sheer, 1996
11    Andrew Borodin <aborodin@vmail.ru>, 2013, 2014
12 
13    This file is part of the Midnight Commander.
14 
15    The Midnight Commander is free software: you can redistribute it
16    and/or modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation, either version 3 of the License,
18    or (at your option) any later version.
19 
20    The Midnight Commander is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23    GNU General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program.  If not, see <http://www.gnu.org/licenses/>.
27  */
28 
29 /** \file
30  *  \brief Source: Dynamic paragraph formatting
31  *  \author Paul Sheer
32  *  \date 1996
33  *  \author Andrew Borodin
34  *  \date 2013, 2014
35  */
36 
37 #include <config.h>
38 
39 #include <stdio.h>
40 #include <stdarg.h>
41 #include <sys/types.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <ctype.h>
45 #include <errno.h>
46 #include <sys/stat.h>
47 
48 #include <stdlib.h>
49 
50 #include "lib/global.h"
51 
52 #include "src/setup.h"          /* option_tab_spacing */
53 
54 #include "edit-impl.h"
55 #include "editwidget.h"
56 
57 /*** global variables ****************************************************************************/
58 
59 char *option_stop_format_chars = NULL;
60 
61 /*** file scope macro definitions ****************************************************************/
62 
63 #define tab_width option_tab_spacing
64 
65 #define FONT_MEAN_WIDTH 1
66 
67 /*** file scope type declarations ****************************************************************/
68 
69 /*** file scope variables ************************************************************************/
70 
71 /*** file scope functions ************************************************************************/
72 /* --------------------------------------------------------------------------------------------- */
73 
74 static off_t
line_start(const edit_buffer_t * buf,long line)75 line_start (const edit_buffer_t * buf, long line)
76 {
77     off_t p;
78     long l;
79 
80     l = buf->curs_line;
81     p = buf->curs1;
82 
83     if (line < l)
84         p = edit_buffer_get_backward_offset (buf, p, l - line);
85     else if (line > l)
86         p = edit_buffer_get_forward_offset (buf, p, line - l, 0);
87 
88     p = edit_buffer_get_bol (buf, p);
89     while (strchr ("\t ", edit_buffer_get_byte (buf, p)) != NULL)
90         p++;
91     return p;
92 }
93 
94 /* --------------------------------------------------------------------------------------------- */
95 
96 static gboolean
bad_line_start(const edit_buffer_t * buf,off_t p)97 bad_line_start (const edit_buffer_t * buf, off_t p)
98 {
99     int c;
100 
101     c = edit_buffer_get_byte (buf, p);
102     if (c == '.')
103     {
104         /* `...' is acceptable */
105         return !(edit_buffer_get_byte (buf, p + 1) == '.'
106                  && edit_buffer_get_byte (buf, p + 2) == '.');
107     }
108     if (c == '-')
109     {
110         /* `---' is acceptable */
111         return !(edit_buffer_get_byte (buf, p + 1) == '-'
112                  && edit_buffer_get_byte (buf, p + 2) == '-');
113     }
114 
115     return (option_stop_format_chars != NULL && strchr (option_stop_format_chars, c) != NULL);
116 }
117 
118 /* --------------------------------------------------------------------------------------------- */
119 /**
120  * Find the start of the current paragraph for the purpose of formatting.
121  * Return position in the file.
122  */
123 
124 static off_t
begin_paragraph(WEdit * edit,gboolean force,long * lines)125 begin_paragraph (WEdit * edit, gboolean force, long *lines)
126 {
127     long i;
128 
129     for (i = edit->buffer.curs_line - 1; i >= 0; i--)
130         if (edit_line_is_blank (edit, i) ||
131             (force && bad_line_start (&edit->buffer, line_start (&edit->buffer, i))))
132         {
133             i++;
134             break;
135         }
136 
137     *lines = edit->buffer.curs_line - i;
138 
139     return edit_buffer_get_backward_offset (&edit->buffer,
140                                             edit_buffer_get_current_bol (&edit->buffer), *lines);
141 }
142 
143 /* --------------------------------------------------------------------------------------------- */
144 /**
145  * Find the end of the current paragraph for the purpose of formatting.
146  * Return position in the file.
147  */
148 
149 static off_t
end_paragraph(WEdit * edit,gboolean force)150 end_paragraph (WEdit * edit, gboolean force)
151 {
152     long i;
153 
154     for (i = edit->buffer.curs_line + 1; i <= edit->buffer.lines; i++)
155         if (edit_line_is_blank (edit, i) ||
156             (force && bad_line_start (&edit->buffer, line_start (&edit->buffer, i))))
157         {
158             i--;
159             break;
160         }
161 
162     return edit_buffer_get_eol (&edit->buffer,
163                                 edit_buffer_get_forward_offset (&edit->buffer,
164                                                                 edit_buffer_get_current_bol
165                                                                 (&edit->buffer),
166                                                                 i - edit->buffer.curs_line, 0));
167 }
168 
169 /* --------------------------------------------------------------------------------------------- */
170 
171 static GString *
get_paragraph(const edit_buffer_t * buf,off_t p,off_t q,gboolean indent)172 get_paragraph (const edit_buffer_t * buf, off_t p, off_t q, gboolean indent)
173 {
174     GString *t;
175 
176     t = g_string_sized_new (128);
177 
178     for (; p < q; p++)
179     {
180         if (indent && edit_buffer_get_byte (buf, p - 1) == '\n')
181             while (strchr ("\t ", edit_buffer_get_byte (buf, p)) != NULL)
182                 p++;
183 
184         g_string_append_c (t, edit_buffer_get_byte (buf, p));
185     }
186 
187     g_string_append_c (t, '\n');
188 
189     return t;
190 }
191 
192 /* --------------------------------------------------------------------------------------------- */
193 
194 static inline void
strip_newlines(unsigned char * t,off_t size)195 strip_newlines (unsigned char *t, off_t size)
196 {
197     unsigned char *p;
198 
199     for (p = t; size-- != 0; p++)
200         if (*p == '\n')
201             *p = ' ';
202 }
203 
204 /* --------------------------------------------------------------------------------------------- */
205 /**
206    This function calculates the number of chars in a line specified to length l in pixels
207  */
208 
209 static inline off_t
next_tab_pos(off_t x)210 next_tab_pos (off_t x)
211 {
212     x += tab_width - x % tab_width;
213     return x;
214 }
215 
216 /* --------------------------------------------------------------------------------------------- */
217 
218 static inline off_t
line_pixel_length(unsigned char * t,off_t b,off_t l,gboolean utf8)219 line_pixel_length (unsigned char *t, off_t b, off_t l, gboolean utf8)
220 {
221     off_t xn, x;                /* position conters */
222     off_t char_length;          /* character length in bytes */
223 
224 #ifndef HAVE_CHARSET
225     (void) utf8;
226 #endif
227 
228     for (xn = 0, x = 0; xn <= l; x = xn, b += char_length)
229     {
230         char *tb;
231 
232         tb = (char *) t + b;
233         char_length = 1;
234 
235         switch (tb[0])
236         {
237         case '\n':
238             return b;
239         case '\t':
240             xn = next_tab_pos (x);
241             break;
242         default:
243 #ifdef HAVE_CHARSET
244             if (utf8)
245             {
246                 gunichar ch;
247 
248                 ch = g_utf8_get_char_validated (tb, -1);
249                 if (ch != (gunichar) (-2) && ch != (gunichar) (-1))
250                 {
251                     char *next_ch;
252 
253                     /* Calculate UTF-8 char length */
254                     next_ch = g_utf8_next_char (tb);
255                     char_length = next_ch - tb;
256 
257                     if (g_unichar_iswide (ch))
258                         x++;
259                 }
260             }
261 #endif
262 
263             xn = x + 1;
264             break;
265         }
266     }
267 
268     return b;
269 }
270 
271 /* --------------------------------------------------------------------------------------------- */
272 
273 static off_t
next_word_start(unsigned char * t,off_t q,off_t size)274 next_word_start (unsigned char *t, off_t q, off_t size)
275 {
276     off_t i;
277     gboolean saw_ws = FALSE;
278 
279     for (i = q; i < size; i++)
280     {
281         switch (t[i])
282         {
283         case '\n':
284             return -1;
285         case '\t':
286         case ' ':
287             saw_ws = TRUE;
288             break;
289         default:
290             if (saw_ws)
291                 return i;
292             break;
293         }
294     }
295     return (-1);
296 }
297 
298 /* --------------------------------------------------------------------------------------------- */
299 /** find the start of a word */
300 
301 static inline int
word_start(unsigned char * t,off_t q,off_t size)302 word_start (unsigned char *t, off_t q, off_t size)
303 {
304     off_t i;
305 
306     if (whitespace (t[q]))
307         return next_word_start (t, q, size);
308 
309     for (i = q;; i--)
310     {
311         unsigned char c;
312 
313         if (i == 0)
314             return (-1);
315         c = t[i - 1];
316         if (c == '\n')
317             return (-1);
318         if (whitespace (c))
319             return i;
320     }
321 }
322 
323 /* --------------------------------------------------------------------------------------------- */
324 /** replaces ' ' with '\n' to properly format a paragraph */
325 
326 static inline void
format_this(unsigned char * t,off_t size,long indent,gboolean utf8)327 format_this (unsigned char *t, off_t size, long indent, gboolean utf8)
328 {
329     off_t q = 0, ww;
330 
331     strip_newlines (t, size);
332     ww = option_word_wrap_line_length * FONT_MEAN_WIDTH - indent;
333     if (ww < FONT_MEAN_WIDTH * 2)
334         ww = FONT_MEAN_WIDTH * 2;
335 
336     while (TRUE)
337     {
338         off_t p;
339 
340         q = line_pixel_length (t, q, ww, utf8);
341         if (q > size)
342             break;
343         if (t[q] == '\n')
344             break;
345         p = word_start (t, q, size);
346         if (p == -1)
347             q = next_word_start (t, q, size);   /* Return the end of the word if the beginning
348                                                    of the word is at the beginning of a line
349                                                    (i.e. a very long word) */
350         else
351             q = p;
352         if (q == -1)            /* end of paragraph */
353             break;
354         if (q != 0)
355             t[q - 1] = '\n';
356     }
357 }
358 
359 /* --------------------------------------------------------------------------------------------- */
360 
361 static inline void
replace_at(WEdit * edit,off_t q,int c)362 replace_at (WEdit * edit, off_t q, int c)
363 {
364     edit_cursor_move (edit, q - edit->buffer.curs1);
365     edit_delete (edit, TRUE);
366     edit_insert_ahead (edit, c);
367 }
368 
369 /* --------------------------------------------------------------------------------------------- */
370 
371 static long
edit_indent_width(const WEdit * edit,off_t p)372 edit_indent_width (const WEdit * edit, off_t p)
373 {
374     off_t q = p;
375 
376     /* move to the end of the leading whitespace of the line */
377     while (strchr ("\t ", edit_buffer_get_byte (&edit->buffer, q)) != NULL
378            && q < edit->buffer.size - 1)
379         q++;
380     /* count the number of columns of indentation */
381     return (long) edit_move_forward3 (edit, p, 0, q);
382 }
383 
384 /* --------------------------------------------------------------------------------------------- */
385 
386 static void
edit_insert_indent(WEdit * edit,long indent)387 edit_insert_indent (WEdit * edit, long indent)
388 {
389     if (!option_fill_tabs_with_spaces)
390         while (indent >= TAB_SIZE)
391         {
392             edit_insert (edit, '\t');
393             indent -= TAB_SIZE;
394         }
395 
396     while (indent-- > 0)
397         edit_insert (edit, ' ');
398 }
399 
400 /* --------------------------------------------------------------------------------------------- */
401 /** replaces a block of text */
402 
403 static inline void
put_paragraph(WEdit * edit,unsigned char * t,off_t p,long indent,off_t size)404 put_paragraph (WEdit * edit, unsigned char *t, off_t p, long indent, off_t size)
405 {
406     off_t cursor;
407     off_t i;
408     int c = '\0';
409 
410     cursor = edit->buffer.curs1;
411     if (indent != 0)
412         while (strchr ("\t ", edit_buffer_get_byte (&edit->buffer, p)) != NULL)
413             p++;
414     for (i = 0; i < size; i++, p++)
415     {
416         if (i != 0 && indent != 0)
417         {
418             if (t[i - 1] == '\n' && c == '\n')
419             {
420                 while (strchr ("\t ", edit_buffer_get_byte (&edit->buffer, p)) != NULL)
421                     p++;
422             }
423             else if (t[i - 1] == '\n')
424             {
425                 off_t curs;
426 
427                 edit_cursor_move (edit, p - edit->buffer.curs1);
428                 curs = edit->buffer.curs1;
429                 edit_insert_indent (edit, indent);
430                 if (cursor >= curs)
431                     cursor += edit->buffer.curs1 - p;
432                 p = edit->buffer.curs1;
433             }
434             else if (c == '\n')
435             {
436                 edit_cursor_move (edit, p - edit->buffer.curs1);
437                 while (strchr ("\t ", edit_buffer_get_byte (&edit->buffer, p)) != NULL)
438                 {
439                     edit_delete (edit, TRUE);
440                     if (cursor > edit->buffer.curs1)
441                         cursor--;
442                 }
443                 p = edit->buffer.curs1;
444             }
445         }
446 
447         c = edit_buffer_get_byte (&edit->buffer, p);
448         if (c != t[i])
449             replace_at (edit, p, t[i]);
450     }
451     edit_cursor_move (edit, cursor - edit->buffer.curs1);       /* restore cursor position */
452 }
453 
454 /* --------------------------------------------------------------------------------------------- */
455 
456 static inline long
test_indent(const WEdit * edit,off_t p,off_t q)457 test_indent (const WEdit * edit, off_t p, off_t q)
458 {
459     long indent;
460 
461     indent = edit_indent_width (edit, p++);
462     if (indent == 0)
463         return 0;
464 
465     for (; p < q; p++)
466         if (edit_buffer_get_byte (&edit->buffer, p - 1) == '\n'
467             && indent != edit_indent_width (edit, p))
468             return 0;
469     return indent;
470 }
471 
472 /* --------------------------------------------------------------------------------------------- */
473 /*** public functions ****************************************************************************/
474 /* --------------------------------------------------------------------------------------------- */
475 
476 void
format_paragraph(WEdit * edit,gboolean force)477 format_paragraph (WEdit * edit, gboolean force)
478 {
479     off_t p, q;
480     long lines;
481     off_t size;
482     GString *t;
483     long indent;
484     unsigned char *t2;
485     gboolean utf8 = FALSE;
486 
487     if (option_word_wrap_line_length < 2)
488         return;
489     if (edit_line_is_blank (edit, edit->buffer.curs_line))
490         return;
491 
492     p = begin_paragraph (edit, force, &lines);
493     q = end_paragraph (edit, force);
494     indent = test_indent (edit, p, q);
495 
496     t = get_paragraph (&edit->buffer, p, q, indent != 0);
497     size = t->len - 1;
498 
499     if (!force)
500     {
501         off_t i;
502         char *stop_format_chars;
503 
504         if (option_stop_format_chars != NULL
505             && strchr (option_stop_format_chars, t->str[0]) != NULL)
506         {
507             g_string_free (t, TRUE);
508             return;
509         }
510 
511         if (option_stop_format_chars == NULL || *option_stop_format_chars == '\0')
512             stop_format_chars = g_strdup ("\t");
513         else
514             stop_format_chars = g_strconcat (option_stop_format_chars, "\t", (char *) NULL);
515 
516         for (i = 0; i < size - 1; i++)
517             if (t->str[i] == '\n' && strchr (stop_format_chars, t->str[i + 1]) != NULL)
518             {
519                 g_free (stop_format_chars);
520                 g_string_free (t, TRUE);
521                 return;
522             }
523 
524         g_free (stop_format_chars);
525     }
526 
527     t2 = (unsigned char *) g_string_free (t, FALSE);
528 #ifdef HAVE_CHARSET
529     utf8 = edit->utf8;
530 #endif
531     format_this (t2, q - p, indent, utf8);
532     put_paragraph (edit, t2, p, indent, size);
533     g_free ((char *) t2);
534 
535     /* Scroll left as much as possible to show the formatted paragraph */
536     edit_scroll_left (edit, -edit->start_col);
537 }
538 
539 /* --------------------------------------------------------------------------------------------- */
540