1 /*
2     This file is part of the KDE libraries
3 
4     SPDX-FileCopyrightText: 2000 Stephan Kulow <coolo@kde.org>
5     SPDX-FileCopyrightText: 2005 Nicolas GOUTTE <goutte@kde.org>
6     SPDX-FileCopyrightText: 2011 Martin Koller <kollix@aon.at>
7 
8     ... and others (see SVN history)
9 */
10 
11 // Start of verbatim comment
12 
13 /*
14 ** This program was written by Richard Verhoeven (NL:5482ZX35)
15 ** at the Eindhoven University of Technology. Email: rcb5@win.tue.nl
16 **
17 ** Permission is granted to distribute, modify and use this program as long
18 ** as this comment is not removed or changed.
19 */
20 
21 // End of verbatim comment
22 
23 /*
24  * man2html-linux-1.0/1.1
25  * This version modified for Redhat/Caldera linux - March 1996.
26  * Michael Hamilton <michael@actrix.gen.nz>.
27  *
28  * man2html-linux-1.2
29  * Added support for BSD mandoc pages - I didn't have any documentation
30  * on the mandoc macros, so I may have missed some.
31  * Michael Hamilton <michael@actrix.gen.nz>.
32  *
33  * vh-man2html-1.3
34  * Renamed to avoid confusion (V for Verhoeven, H for Hamilton).
35  *
36  * vh-man2html-1.4
37  * Now uses /etc/man.config
38  * Added support for compressed pages.
39  * Added "length-safe" string operations for client input parameters.
40  * More secure, -M secured, and client input string lengths checked.
41  *
42  */
43 
44 /*
45 ** If you want to use this program for your WWW server, adjust the line
46 ** which defines the CGIBASE or compile it with the -DCGIBASE='"..."' option.
47 **
48 ** You have to adjust the built-in manpath to your local system. Note that
49 ** every directory should start and end with the '/' and that the first
50 ** directory should be "/" to allow a full path as an argument.
51 **
52 ** The program first check if PATH_INFO contains some information.
53 ** If it does (t.i. man2html/some/thing is used), the program will look
54 ** for a manpage called PATH_INFO in the manpath.
55 **
56 ** Otherwise the manpath is searched for the specified command line argument,
57 ** where the following options can be used:
58 **
59 ** name      name of manpage (csh, printf, xv, troff)
60 ** section   the section (1 2 3 4 5 6 7 8 9 n l 1v ...)
61 ** -M path   an extra directory to look for manpages (replaces "/")
62 **
63 ** If man2html finds multiple manpages that satisfy the options, an index
64 ** is displayed and the user can make a choice. If only one page is
65 ** found, that page will be displayed.
66 **
67 ** man2html will add links to the converted manpages. The function add_links
68 ** is used for that. At the moment it will add links as follows, where
69 **     indicates what should match to start with:
70 ** ^^^
71 ** Recognition           Item            Link
72 ** ----------------------------------------------------------
73 ** name(*)               Manpage         ../man?/name.*
74 **     ^
75 ** name@hostname         Email address   mailto:name@hostname
76 **     ^
77 ** method://string       URL             method://string
78 **       ^^^
79 ** www.host.name         WWW server      http://www.host.name
80 ** ^^^^
81 ** ftp.host.name         FTP server      ftp://ftp.host.name
82 ** ^^^^
83 ** <file.h>              Include file    file:/usr/include/file.h
84 **      ^^^
85 **
86 ** Since man2html does not check if manpages, hosts or email addresses exist,
87 ** some links might not work. For manpages, some extra checks are performed
88 ** to make sure not every () pair creates a link. Also out of date pages
89 ** might point to incorrect places.
90 **
91 ** The program will not allow users to get system specific files, such as
92 ** /etc/passwd. It will check that "man" is part of the specified file and
93 ** that  "/../" isn't. Even if someone manages to get such file, man2html will
94 ** handle it like a manpage and will usually not produce any output (or crash).
95 **
96 ** If you find any bugs when normal manpages are converted, please report
97 ** them to me (rcb5@win.tue.nl) after you have checked that man(1) can handle
98 ** the manpage correct.
99 **
100 ** Known bugs and missing features:
101 **
102 **  * Equations are not converted at all.
103 **  * Tables are converted but some features are not possible in html.
104 **  * The tabbing environment is converted by counting characters and adding
105 **    spaces. This might go wrong (outside <PRE>)
106 **  * Some manpages rely on the fact that troff/nroff is used to convert
107 **    them and use features which are not described in the man manpages.
108 **    (definitions, calculations, conditionals, requests). I can't guarantee
109 **    that all these features work on all manpages. (I didn't have the
110 **    time to look through all the available manpages.)
111 */
112 
113 #include "man2html.h"
114 #include "kio_man_debug.h"
115 #include "request_hash.h"
116 
117 #include <config-runtime.h>
118 
119 #include <ctype.h>
120 
121 #include <unistd.h>
122 #include <string.h>
123 
124 #include <stdio.h>
125 
126 #include <QByteArray>
127 #include <QDateTime>
128 #include <QMap>
129 #include <QStack>
130 #include <QString>
131 #include <QTextCodec>
132 #include <QDebug>
133 #include <QRegularExpression>
134 
135 #ifdef SIMPLE_MAN2HTML
136 # include <stdlib.h>
137 # include <iostream>
138 # include <dirent.h>
139 # include <sys/stat.h>
140 # include <QFile>
141 # include <QFileInfo>
142 # include <QDir>
143 # include <karchive_version.h>
144 # if KARCHIVE_VERSION >= QT_VERSION_CHECK(5, 85, 0)
145 # include <KCompressionDevice>
146 # else
147 # include <KFilterDev>
148 # endif
149 # define BYTEARRAY(x) x.constData()
150 #else
151 # include <KLocalizedString>
152 # define BYTEARRAY(x) x
153 #endif
154 
155 #define NULL_TERMINATED(n) ((n) + 1)
156 
157 #define HUGE_STR_MAX  10000
158 #define LARGE_STR_MAX 2000
159 #define MED_STR_MAX   500
160 
161 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
162 
163 /* mdoc(7) Bl/El lists to HTML list types */
164 #define BL_DESC_LIST   1
165 #define BL_BULLET_LIST 2
166 #define BL_ENUM_LIST   4
167 
168 /* mdoc(7) Bd/Ed example(?) blocks */
169 #define BD_LITERAL  1
170 #define BD_INDENT   2
171 
172 static int s_nroff = 1; // NROFF mode by default
173 
174 static QByteArray mandoc_name;  // Nm can store the first used name
175 
176 static int mandoc_name_count = 0; /* Don't break on the first Nm */
177 
178 /* below this you should not change anything unless you know a lot
179 ** about this program or about troff.
180 */
181 
182 
183 /// Structure for character definitions
184 struct CSTRDEF
185 {
186     int nr, slen;
187     const char *st;
188 };
189 
190 
191 
192 const char NEWLINE[2] = "\n";
193 
194 /**
195  * Class for defining strings and macros
196  */
197 class StringDefinition
198 {
199 public:
StringDefinition(void)200     StringDefinition(void) : m_length(0) {}
StringDefinition(int len,const char * cstr)201     StringDefinition(int len, const char* cstr) : m_length(len), m_output(cstr) {}
202 public:
203     int m_length; ///< Length of output text
204     QByteArray m_output; ///< Defined string
205 };
206 
207 /**
208  * Class for defining number registers
209  * \note Not for internal read-only registers
210  */
211 class NumberDefinition
212 {
213 public:
NumberDefinition(void)214     NumberDefinition(void) : m_value(0), m_increment(0) {}
NumberDefinition(int value)215     NumberDefinition(int value) : m_value(value), m_increment(0) {}
NumberDefinition(int value,int incr)216     NumberDefinition(int value, int incr) : m_value(value), m_increment(incr) {}
217 public:
218     int m_value; ///< value of number register
219     int m_increment; ///< Increment of number register
220     // ### TODO: display form (.af)
221 };
222 
223 /**
224  * Map of character definitions
225  */
226 static QMap<QByteArray, StringDefinition> s_characterDefinitionMap;
227 
228 /**
229  * Map of string variable and macro definitions
230  * \note String variables and macros are the same thing!
231  */
232 static QMap<QByteArray, StringDefinition> s_stringDefinitionMap;
233 
234 /**
235  * Map of number registers
236  * \note Intern number registers (starting with a dot are not handled here)
237  */
238 static QMap<QByteArray, NumberDefinition> s_numberDefinitionMap;
239 
240 static void fill_old_character_definitions(void);
241 
242 /**
243  * Initialize character variables
244  */
InitCharacterDefinitions(void)245 static void InitCharacterDefinitions(void)
246 {
247     fill_old_character_definitions();
248     // ### HACK: as we are converting to HTML too early, define characters with HTML references
249     s_characterDefinitionMap.insert("&lt;-", StringDefinition(1, "&larr;"));     // <-
250     s_characterDefinitionMap.insert("-&gt;", StringDefinition(1, "&rarr;"));     // ->
251     s_characterDefinitionMap.insert("&lt;&gt;", StringDefinition(1, "&harr;"));     // <>
252     s_characterDefinitionMap.insert("&lt;=", StringDefinition(1, "&le;"));     // <=
253     s_characterDefinitionMap.insert("&gt;=", StringDefinition(1, "&ge;"));     // >=
254     // End HACK
255 }
256 
257 /**
258  * Initialize string variables
259  */
InitStringDefinitions(void)260 static void InitStringDefinitions(void)
261 {
262     // mdoc-only, see mdoc.samples(7)
263     s_stringDefinitionMap.insert("<=", StringDefinition(1, "&le;"));
264     s_stringDefinitionMap.insert(">=", StringDefinition(1, "&ge;"));
265     s_stringDefinitionMap.insert("Rq", StringDefinition(1, "&rdquo;"));
266     s_stringDefinitionMap.insert("Lq", StringDefinition(1, "&ldquo;"));
267     s_stringDefinitionMap.insert("ua", StringDefinition(1, "&circ"));     // Note this is different from \(ua
268     s_stringDefinitionMap.insert("aa", StringDefinition(1, "&acute;"));
269     s_stringDefinitionMap.insert("ga", StringDefinition(1, "`"));
270     s_stringDefinitionMap.insert("q", StringDefinition(1, "&quot;"));
271     s_stringDefinitionMap.insert("Pi", StringDefinition(1, "&pi;"));
272     s_stringDefinitionMap.insert("Ne", StringDefinition(1, "&ne;"));
273     s_stringDefinitionMap.insert("Le", StringDefinition(1, "&le;"));
274     s_stringDefinitionMap.insert("Ge", StringDefinition(1, "&ge;"));
275     s_stringDefinitionMap.insert("Lt", StringDefinition(1, "&lt;"));
276     s_stringDefinitionMap.insert("Gt", StringDefinition(1, "&gt;"));
277     s_stringDefinitionMap.insert("Pm", StringDefinition(1, "&plusmn;"));
278     s_stringDefinitionMap.insert("If", StringDefinition(1, "&infin;"));
279     s_stringDefinitionMap.insert("Na", StringDefinition(3, "NaN"));
280     s_stringDefinitionMap.insert("Ba", StringDefinition(1, "|"));
281     // end mdoc-only
282     // man(7)
283     s_stringDefinitionMap.insert("Tm", StringDefinition(1, "&trade;"));     // \*(TM
284     s_stringDefinitionMap.insert("R", StringDefinition(1, "&reg;"));     // \*R
285     s_stringDefinitionMap.insert("lq", StringDefinition(1, "&ldquo;"));     // Left angled double quote
286     s_stringDefinitionMap.insert("rq", StringDefinition(1, "&rdquo;"));     // Right angled double quote
287     // end man(7)
288     // Missing characters from man(7):
289     // \*S "Change to default font size"
290 #ifndef SIMPLE_MAN2HTML
291     // Special KDE KIO man:
292     const QByteArray kdeversion(KDE_VERSION_STRING);
293     s_stringDefinitionMap.insert(".KDE_VERSION_STRING", StringDefinition(kdeversion.length(), kdeversion));
294 #endif
295 }
296 
297 /**
298  * Initialize number registers
299  * \note Internal read-only registers are not handled here
300  */
InitNumberDefinitions(void)301 static void InitNumberDefinitions(void)
302 {
303     // As the date number registers are more for end-users, better choose local time.
304     // Groff seems to support Gregorian dates only
305     QDate today(QDate::currentDate());
306     s_numberDefinitionMap.insert("year", today.year());   // Y2K-correct year
307     s_numberDefinitionMap.insert("yr", today.year() - 1900);   // Y2K-incorrect year
308     s_numberDefinitionMap.insert("mo", today.month());
309     s_numberDefinitionMap.insert("dy", today.day());
310     s_numberDefinitionMap.insert("dw", today.dayOfWeek());
311 }
312 
313 
314 #define V(A,B) ((A)*256+(B))
315 
316 //used in expand_char, e.g. for "\(bu"
317 // see groff_char(7) for list
318 static const CSTRDEF standardchars[] =
319 {
320     { V('*', '*'), 1, "*" },
321     { V('*', 'A'), 1, "&Alpha;" },
322     { V('*', 'B'), 1, "&Beta;" },
323     { V('*', 'C'), 1, "&Xi;" },
324     { V('*', 'D'), 1, "&Delta;" },
325     { V('*', 'E'), 1, "&Epsilon;" },
326     { V('*', 'F'), 1, "&Phi;" },
327     { V('*', 'G'), 1, "&Gamma;" },
328     { V('*', 'H'), 1, "&Theta;" },
329     { V('*', 'I'), 1, "&Iota;" },
330     { V('*', 'K'), 1, "&Kappa;" },
331     { V('*', 'L'), 1, "&Lambda;" },
332     { V('*', 'M'), 1, "&Mu:" },
333     { V('*', 'N'), 1, "&Nu;" },
334     { V('*', 'O'), 1, "&Omicron;" },
335     { V('*', 'P'), 1, "&Pi;" },
336     { V('*', 'Q'), 1, "&Psi;" },
337     { V('*', 'R'), 1, "&Rho;" },
338     { V('*', 'S'), 1, "&Sigma;" },
339     { V('*', 'T'), 1, "&Tau;" },
340     { V('*', 'U'), 1, "&Upsilon;" },
341     { V('*', 'W'), 1, "&Omega;" },
342     { V('*', 'X'), 1, "&Chi;" },
343     { V('*', 'Y'), 1, "&Eta;" },
344     { V('*', 'Z'), 1, "&Zeta;" },
345     { V('*', 'a'), 1, "&alpha;"},
346     { V('*', 'b'), 1, "&beta;"},
347     { V('*', 'c'), 1, "&xi;"},
348     { V('*', 'd'), 1, "&delta;"},
349     { V('*', 'e'), 1, "&epsilon;"},
350     { V('*', 'f'), 1, "&phi;"},
351     { V('*', 'g'), 1, "&gamma;"},
352     { V('*', 'h'), 1, "&theta;"},
353     { V('*', 'i'), 1, "&iota;"},
354     { V('*', 'k'), 1, "&kappa;"},
355     { V('*', 'l'), 1, "&lambda;"},
356     { V('*', 'm'), 1, "&mu;" },
357     { V('*', 'n'), 1, "&nu;"},
358     { V('*', 'o'), 1, "&omicron;"},
359     { V('*', 'p'), 1, "&pi;"},
360     { V('*', 'q'), 1, "&psi;"},
361     { V('*', 'r'), 1, "&rho;"},
362     { V('*', 's'), 1, "&sigma;"},
363     { V('*', 't'), 1, "&tau;"},
364     { V('*', 'u'), 1, "&upsilon;"},
365     { V('*', 'w'), 1, "&omega;"},
366     { V('*', 'x'), 1, "&chi;"},
367     { V('*', 'y'), 1, "&eta;"},
368     { V('*', 'z'), 1, "&zeta;"},
369     { V('+', '-'), 1, "&plusmn;" }, // not in groff_char(7)
370     { V('+', 'f'), 1, "&phi;"}, // phi1, we use the standard phi
371     { V('+', 'h'), 1, "&theta;"}, // theta1, we use the standard theta
372     { V('+', 'p'), 1, "&omega;"}, // omega1, we use the standard omega
373     { V('1', '2'), 1, "&frac12;" },
374     { V('1', '4'), 1, "&frac14;" },
375     { V('3', '4'), 1, "&frac34;" },
376     { V('F', 'i'), 1, "&#xFB03;" }, // ffi ligature
377     { V('F', 'l'), 1, "&#xFB04;" }, // ffl ligature
378     { V('a', 'p'), 1, "~" },
379     { V('b', 'r'), 1, "|" },
380     { V('b', 'u'), 1, "&bull;" },
381     { V('b', 'v'), 1, "|" },
382     { V('c', 'i'), 1, "&#x25CB;" }, // circle
383     { V('c', 'o'), 1, "&copy;" },
384     { V('c', 't'), 1, "&cent;" },
385     { V('d', 'e'), 1, "&deg;" },
386     { V('d', 'g'), 1, "&dagger;" },
387     { V('d', 'i'), 1, "&divide;" },
388     { V('e', 'm'), 1, "&mdash;" },
389     { V('e', 'n'), 1, "&ndash;"},
390     { V('e', 'q'), 1, "=" },
391     { V('e', 's'), 1, "&empty;" },
392     { V('f', 'f'), 1, "&#0xFB00;" }, // ff ligature
393     { V('f', 'i'), 1, "&#0xFB01;" }, // fi ligature
394     { V('f', 'l'), 1, "&#0xFB02;" }, // fl ligature
395     { V('f', 'm'), 1, "&prime;" },
396     { V('g', 'a'), 1, "`" },
397     { V('h', 'y'), 1, "-" },
398     { V('l', 'c'), 2, "|&#175;" }, // ### TODO: not in groff_char(7)
399     { V('l', 'f'), 2, "|_" }, // ### TODO: not in groff_char(7)
400     { V('l', 'k'), 1, "<FONT SIZE=+2>{</FONT>" }, // ### TODO: not in groff_char(7)
401     { V('m', 'i'), 1, "-" }, // ### TODO: not in groff_char(7)
402     { V('m', 'u'), 1, "&times;" },
403     { V('n', 'o'), 1, "&not;" },
404     { V('o', 'r'), 1, "|" },
405     { V('p', 'l'), 1, "+" },
406     { V('r', 'c'), 2, "&#175;|" }, // ### TODO: not in groff_char(7)
407     { V('r', 'f'), 2, "_|" }, // ### TODO: not in groff_char(7)
408     { V('r', 'g'), 1, "&reg;" },
409     { V('r', 'k'), 1, "<FONT SIZE=+2>}</FONT>" }, // ### TODO: not in groff_char(7)
410     { V('r', 'n'), 1, "&oline;" },
411     { V('r', 'u'), 1, "_" },
412     { V('s', 'c'), 1, "&sect;" },
413     { V('s', 'l'), 1, "/" },
414     { V('s', 'q'), 2, "&#x25A1" }, // WHITE SQUARE
415     { V('t', 's'), 1, "&#x03C2;" }, // FINAL SIGMA
416     { V('u', 'l'), 1, "_" },
417     { V('-', 'D'), 1, "&ETH;" },
418     { V('S', 'd'), 1, "&eth;" },
419     { V('T', 'P'), 1, "&THORN;" },
420     { V('T', 'p'), 1, "&thorn;" },
421     { V('A', 'E'), 1, "&AElig;" },
422     { V('a', 'e'), 1, "&aelig;" },
423     { V('O', 'E'), 1, "&OElig;" },
424     { V('o', 'e'), 1, "&oelig;" },
425     { V('s', 's'), 1, "&szlig;" },
426     { V('\'', 'A'), 1, "&Aacute;" },
427     { V('\'', 'E'), 1, "&Eacute;" },
428     { V('\'', 'I'), 1, "&Iacute;" },
429     { V('\'', 'O'), 1, "&Oacute;" },
430     { V('\'', 'U'), 1, "&Uacute;" },
431     { V('\'', 'Y'), 1, "&Yacute;" },
432     { V('\'', 'a'), 1, "&aacute;" },
433     { V('\'', 'e'), 1, "&eacute;" },
434     { V('\'', 'i'), 1, "&iacute;" },
435     { V('\'', 'o'), 1, "&oacute;" },
436     { V('\'', 'u'), 1, "&uacute;" },
437     { V('\'', 'y'), 1, "&yacute;" },
438     { V(':', 'A'), 1, "&Auml;" },
439     { V(':', 'E'), 1, "&Euml;" },
440     { V(':', 'I'), 1, "&Iuml;" },
441     { V(':', 'O'), 1, "&Ouml;" },
442     { V(':', 'U'), 1, "&Uuml;" },
443     { V(':', 'a'), 1, "&auml;" },
444     { V(':', 'e'), 1, "&euml;" },
445     { V(':', 'i'), 1, "&iuml;" },
446     { V(':', 'o'), 1, "&ouml;" },
447     { V(':', 'u'), 1, "&uuml;" },
448     { V(':', 'y'), 1, "&yuml;" },
449     { V('^', 'A'), 1, "&Acirc;" },
450     { V('^', 'E'), 1, "&Ecirc;" },
451     { V('^', 'I'), 1, "&Icirc;" },
452     { V('^', 'O'), 1, "&Ocirc;" },
453     { V('^', 'U'), 1, "&Ucirc;" },
454     { V('^', 'a'), 1, "&acirc;" },
455     { V('^', 'e'), 1, "&ecirc;" },
456     { V('^', 'i'), 1, "&icirc;" },
457     { V('^', 'o'), 1, "&ocirc;" },
458     { V('^', 'u'), 1, "&ucirc;" },
459     { V('`', 'A'), 1, "&Agrave;" },
460     { V('`', 'E'), 1, "&Egrave;" },
461     { V('`', 'I'), 1, "&Igrave;" },
462     { V('`', 'O'), 1, "&Ograve;" },
463     { V('`', 'U'), 1, "&Ugrave;" },
464     { V('`', 'a'), 1, "&agrave;" },
465     { V('`', 'e'), 1, "&egrave;" },
466     { V('`', 'i'), 1, "&igrave;" },
467     { V('`', 'o'), 1, "&ograve;" },
468     { V('`', 'u'), 1, "&ugrave;" },
469     { V('~', 'A'), 1, "&Atilde;" },
470     { V('~', 'N'), 1, "&Ntilde;" },
471     { V('~', 'O'), 1, "&Otilde;" },
472     { V('~', 'a'), 1, "&atilde" },
473     { V('~', 'n'), 1, "&ntilde;" },
474     { V('~', 'o'), 1, "&otilde;" },
475     { V(',', 'C'), 1, "&Ccedil;" },
476     { V(',', 'c'), 1, "&ccedil;" },
477     { V('/', 'L'), 1, "&#x0141;" },
478     { V('/', 'l'), 1, "&#x0142;" },
479     { V('/', 'O'), 1, "&Oslash;" },
480     { V('/', 'o'), 1, "&oslash;" },
481     { V('o', 'A'), 1, "&Aring;" },
482     { V('o', 'a'), 1, "&aring;" },
483     { V('a', '"'), 1, "\"" },
484     { V('a', '-'), 1, "&macr;" },
485     { V('a', '.'), 1, "." },
486     { V('a', '^'), 1, "&circ;" },
487     { V('a', 'a'), 1, "&acute;" },
488     { V('a', 'b'), 1, "`" },
489     { V('a', 'c'), 1, "&cedil;" },
490     { V('a', 'd'), 1, "&uml;" },
491     { V('a', 'h'), 1, "&#x02C2;" }, // caron
492     { V('a', 'o'), 1, "&#x02DA;" }, // ring
493     { V('a', '~'), 1, "&tilde;" },
494     { V('h', 'o'), 1, "&#x02DB;" }, // ogonek
495     { V('.', 'i'), 1, "&#x0131;" }, // dot less i
496     { V('C', 's'), 1, "&curren;" }, //krazy:exclude=spelling
497     { V('D', 'o'), 1, "$" },
498     { V('P', 'o'), 1, "&pound;" },
499     { V('Y', 'e'), 1, "&yen;" },
500     { V('F', 'n'), 1, "&fnof;" },
501     { V('F', 'o'), 1, "&laquo;" },
502     { V('F', 'c'), 1, "&raquo;" },
503     { V('f', 'o'), 1, "&#x2039;" }, // single left guillemet
504     { V('f', 'c'), 1, "&#x203A;" }, // single right guillemet
505     { V('r', '!'), 1, "&iecl;" },
506     { V('r', '?'), 1, "&iquest;" },
507     { V('O', 'f'), 1, "&ordf" },
508     { V('O', 'm'), 1, "&ordm;" },
509     { V('p', 'c'), 1, "&middot;" },
510     { V('S', '1'), 1, "&sup1;" },
511     { V('S', '2'), 1, "&sup2;" },
512     { V('S', '3'), 1, "&sup3;" },
513     { V('<', '-'), 1, "&larr;" },
514     { V('-', '>'), 1, "&rarr;" },
515     { V('<', '>'), 1, "&harr;" },
516     { V('d', 'a'), 1, "&darr;" },
517     { V('u', 'a'), 1, "&uarr;" },
518     { V('l', 'A'), 1, "&lArr;" },
519     { V('r', 'A'), 1, "&rArr;" },
520     { V('h', 'A'), 1, "&hArr;" },
521     { V('d', 'A'), 1, "&dArr;" },
522     { V('u', 'A'), 1, "&uArr;" },
523     { V('b', 'a'), 1, "|" },
524     { V('b', 'b'), 1, "&brvbar;" },
525     { V('t', 'm'), 1, "&trade;" },
526     { V('d', 'd'), 1, "&Dagger;" },
527     { V('p', 's'), 1, "&para;" },
528     { V('%', '0'), 1, "&permil;" },
529     { V('f', '/'), 1, "&frasl;" }, // Fraction slash
530     { V('s', 'd'), 1, "&Prime;" },
531     { V('h', 'a'), 1, "^" },
532     { V('t', 'i'), 1, "&tilde;" },
533     { V('l', 'B'), 1, "[" },
534     { V('r', 'B'), 1, "]" },
535     { V('l', 'C'), 1, "{" },
536     { V('r', 'C'), 1, "}" },
537     { V('l', 'a'), 1, "&lt;" },
538     { V('r', 'a'), 1, "&gt;" },
539     { V('l', 'h'), 1, "&le;" },
540     { V('r', 'h'), 1, "&ge;" },
541     { V('B', 'q'), 1, "&bdquo;" },
542     { V('b', 'q'), 1, "&sbquo;" },
543     { V('l', 'q'), 1, "&ldquo;" },
544     { V('r', 'q'), 1, "&rdquo;" },
545     { V('o', 'q'), 1, "&lsquo;" },
546     { V('c', 'q'), 1, "&rsquo;" },
547     { V('a', 'q'), 1, "'" },
548     { V('d', 'q'), 1, "\"" },
549     { V('a', 't'), 1, "@" },
550     { V('s', 'h'), 1, "#" },
551     { V('r', 's'), 1, "\\" },
552     { V('t', 'f'), 1, "&there4;" },
553     { V('~', '~'), 1, "&cong;" },
554     { V('~', '='), 1, "&asymp;" },
555     { V('!', '='), 1, "&ne;" },
556     { V('<', '='), 1, "&le;" },
557     { V('=', '='), 1, "&equiv;" },
558     { V('=', '~'), 1, "&cong;" }, // ### TODO: verify
559     { V('>', '='), 1, "&ge;" },
560     { V('A', 'N'), 1, "&and;" },
561     { V('O', 'R'), 1, "&or;" },
562     { V('t', 'e'), 1, "&exist;" },
563     { V('f', 'a'), 1, "&forall;" },
564     { V('A', 'h'), 1, "&alefsym;" },
565     { V('I', 'm'), 1, "&image;" },
566     { V('R', 'e'), 1, "&real;" },
567     { V('i', 'f'), 1, "&infin;" },
568     { V('m', 'd'), 1, "&sdot;" },
569     { V('m', 'o'), 1, "&#x2206;" }, // element ### TODO verify
570     { V('n', 'm'), 1, "&notin;" },
571     { V('p', 't'), 1, "&prop;" },
572     { V('p', 'p'), 1, "&perp;" },
573     { V('s', 'b'), 1, "&sub;" },
574     { V('s', 'p'), 1, "&sup;" },
575     { V('i', 'b'), 1, "&sube;" },
576     { V('i', 'p'), 1, "&supe;" },
577     { V('i', 's'), 1, "&int;" },
578     { V('s', 'r'), 1, "&radic;" },
579     { V('p', 'd'), 1, "&part;" },
580     { V('c', '*'), 1, "&otimes;" },
581     { V('c', '+'), 1, "&oplus;" },
582     { V('c', 'a'), 1, "&cap;" },
583     { V('c', 'u'), 1, "&cup;" },
584     { V('g', 'r'), 1, "V" }, // gradient ### TODO Where in Unicode?
585     { V('C', 'R'), 1, "&crarr;" },
586     { V('s', 't'), 2, "-)" }, // "such that" ### TODO Where in Unicode?
587     { V('/', '_'), 1, "&ang;" },
588     { V('w', 'p'), 1, "&weierp;" },
589     { V('l', 'z'), 1, "&loz;" },
590     { V('a', 'n'), 1, "-" }, // "horizontal arrow extension"  ### TODO Where in Unicode?
591 };
592 
593 // long form for abbreviated standard names (.St macro)
594 struct StandardName
595 {
596     const char *abbrev;
597     const char *formalName;
598 };
599 
600 static const StandardName STANDARD_NAMES[] =
601 {
602     { "-ansiC", "ANSI X3.159-1989 ('ANSI C89')" },
603     { "-ansiC-89", "ANSI X3.159-1989 ('ANSI C89')" },
604     { "-isoC", "ISO/IEC 9899:1990 ('ISO C90')" },
605     { "-isoC-90", "ISO/IEC 9899:1990 ('ISO C90')" },
606     { "-isoC-99", "ISO/IEC 9899:1999 ('ISO C99')" },
607     { "-isoC-2011", "ISO/IEC 9899:2011 ('ISO C11')" },
608     { "-iso9945-1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')" },
609     { "-iso9945-1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')" },
610     { "-p1003.1", "IEEE Std 1003.1 ('POSIX.1')" },
611     { "-p1003.1-88", "IEEE Std 1003.1-1988 ('POSIX.1')" },
612     { "-p1003.1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')" },
613     { "-p1003.1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')" },
614     { "-p1003.1b-93", "IEEE Std 1003.1b-1993 ('POSIX.1')" },
615     { "-p1003.1c-95", "IEEE Std 1003.1c-1995 ('POSIX.1')" },
616     { "-p1003.1g-2000", "IEEE Std 1003.1g-2000 ('POSIX.1')" },
617     { "-p1003.1i-95", "IEEE Std 1003.1i-1995 ('POSIX.1')" },
618     { "-p1003.1-2001", "IEEE Std 1003.1-2001 ('POSIX.1')" },
619     { "-p1003.1-2004", "IEEE Std 1003.1-2004 ('POSIX.1')" },
620     { "-p1003.1-2008", "IEEE Std 1003.1-2008 ('POSIX.1')" },
621     { "-iso9945-2-93", "ISO/IEC 9945-2:1993 ('POSIX.2')" },
622     { "-p1003.2", "IEEE Std 1003.2 ('POSIX.2')" },
623     { "-p1003.2-92", "IEEE Std 1003.2-1992 ('POSIX.2')" },
624     { "-p1003.2a-92", "IEEE Std 1003.2a-1992 ('POSIX.2')" },
625     { "-susv2", "Version 2 of the Single UNIX Specification ('SUSv2')" },
626     { "-susv3", "Version 3 of the Single UNIX Specification ('SUSv3')" },
627     { "-svid4", "System V Interface Definition, Fourth Edition ('SVID4')" },
628     { "-xbd5", "X/Open Base Definitions Issue 5 ('XBD5')" },
629     { "-xcu5", "X/Open Commands and Utilities Issue 5 ('XCU5')" },
630     { "-xcurses4.2", "X/Open Curses Issue 4, Version 2 ('XCURSES4.2')" },
631     { "-xns5", "X/Open Networking Services Issue 5 ('XNS5')" },
632     { "-xns5.2", "X/Open Networking Services Issue 5.2 ('XNS5.2')" },
633     { "-xpg3", "X/Open Portability Guide Issue 3 ('XPG3')" },
634     { "-xpg4", "X/Open Portability Guide Issue 4 ('XPG4')" },
635     { "-xpg4.2", "X/Open Portability Guide Issue 4, Version 2 ('XPG4.2')" },
636     { "-xsh5", "X/Open System Interfaces and Headers Issue 5 ('XSH5')" },
637     { "-ieee754", "IEEE Std 754-1985" },
638     { "-iso8802-3", "ISO/IEC 8802-3:1989" }
639 };
640 
641 
642 /* default: print code */
643 
644 
645 /* static char eqndelimopen=0, eqndelimclose=0; */
646 static char escapesym = '\\', nobreaksym = '\'', controlsym = '.', fieldsym = 0, padsym = 0;
647 
648 static char *buffer = nullptr;
649 static int buffpos = 0, buffmax = 0;
650 static bool scaninbuff = false;
651 static int itemdepth = 0;
652 static int in_div = 0;
653 static int dl_set[20] = { 0 };
654 static QStack<QByteArray> listItemStack;
655 static bool still_dd = 0;
656 static int tabstops[20] = { 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96 };
657 static int maxtstop = 12;
658 static int curpos = 0;
659 static bool break_the_while_loop = false;
660 
661 static char *scan_troff(char *c, bool san, char **result);
662 static char *scan_troff_mandoc(char *c, bool san, char **result);
663 static int getNumberRegisterValue(const QByteArray &name, int sign = 0);
664 
665 static QList<QByteArray> s_argumentList;
666 
667 static QByteArray cssFile;
668 
669 static QByteArray s_dollarZero; // Value of $0
670 
setCssFile(const QByteArray & _cssFile)671 void setCssFile(const QByteArray& _cssFile)
672 {
673     cssFile = _cssFile;
674 }
675 
fill_old_character_definitions(void)676 static void fill_old_character_definitions(void)
677 {
678     for (const CSTRDEF &standardchar : standardchars)
679     {
680         const int nr = standardchar.nr;
681         const char temp[3] = { char(nr / 256), char(nr % 256), 0 };
682         QByteArray name(temp);
683         s_characterDefinitionMap.insert(name, StringDefinition(standardchar.slen, standardchar.st));
684     }
685 }
686 
687 static char outbuffer[NULL_TERMINATED(HUGE_STR_MAX)];
688 static int no_newline_output = 0;
689 static int newline_for_fun = 0;
690 static bool output_possible = false;
691 
692 static const char * const includedirs[] =
693 {
694     "/usr/include",
695     "/usr/include/sys",
696     "/usr/local/include",
697     "/opt/local/include",
698     "/usr/ccs",
699     "/usr/X11R6/include",
700     "/usr/openwin/include",
701     "/usr/include/g++",
702     nullptr
703 };
704 
705 static bool ignore_links = false;
706 
add_links(char * c)707 static void add_links(char *c)
708 {
709     /*
710     ** Add the links to the output.
711     ** At the moment the following are recognized:
712     **
713     ** name(*)                 -> ../man?/name.*
714     ** method://string         -> method://string
715     ** www.host.name           -> http://www.host.name
716     ** ftp.host.name           -> ftp://ftp.host.name
717     ** name@host               -> mailto:name@host
718     ** <name.h>                -> file:/usr/include/name.h   (guess)
719     **
720     ** Other possible links to add in the future:
721     **
722     ** /dir/dir/file  -> file:/dir/dir/file
723     */
724 
725     if (ignore_links)
726     {
727         output_real(c);
728         return;
729     }
730 
731     int i, j, nr;
732     char *f, *g, *h;
733     const int numtests = 6; // Nmber of tests
734     char *idtest[numtests]; // url, mailto, www, ftp, manpage, C header file
735     bool ok;
736     /* search for (section) */
737     nr = 0;
738     idtest[0] = strstr(c + 1, "://");
739     idtest[1] = strchr(c + 1, '@');
740     idtest[2] = strstr(c, "www.");
741     idtest[3] = strstr(c, "ftp.");
742     idtest[4] = strchr(c + 1, '(');
743     idtest[5] = strstr(c + 1, ".h&gt;");
744     for (i = 0; i < numtests; ++i) nr += (idtest[i] != nullptr);
745     while (nr)
746     {
747         j = -1;
748         for (i = 0; i < numtests; i++)
749             if (idtest[i] && (j < 0 || idtest[i] < idtest[j])) j = i;
750         switch (j)
751         {
752         case 5:   /* <name.h> */
753         {
754             f = idtest[5];
755             h = f + 2;
756             g = f;
757             while (g > c && g[-1] != ';') g--;
758             bool wrote_include = false;
759 
760             if (g != c)
761             {
762                 QByteArray dir;
763                 QByteArray file(g, h - g);
764                 file = file.trimmed();
765                 for (int index = 0; includedirs[index]; index++)
766                 {
767                     QByteArray str(includedirs[index]);
768                     str.append('/');
769                     str.append(file);
770                     if (!access(str.data(), R_OK))
771                     {
772                         dir = includedirs[index];
773                         break;
774                     }
775                 }
776                 if (!dir.isEmpty())
777                 {
778 
779                     char t;
780                     t = *g;
781                     *g = 0;
782                     output_real(c);
783                     *g = t;
784                     *h = 0;
785 
786                     QByteArray str;
787                     str.append("<A HREF=\"file:");
788                     str.append(dir.data());
789                     str.append("/");
790                     str.append(file.data());
791                     str.append("\">");
792                     str.append(file.data());
793                     str.append("</A>&gt;");
794 
795                     output_real(str.data());
796                     c = f + 6;
797                     wrote_include = true;
798                 }
799 
800             }
801 
802             if (!wrote_include)
803             {
804                 f[5] = 0;
805                 output_real(c);
806                 f[5] = ';';
807                 c = f + 5;
808             }
809         }
810         break;
811         case 4: /* manpage */
812             f = idtest[j];
813             /* check section */
814             g = strchr(f, ')');
815             // The character before f must be alphanumeric, the end of a HTML tag or the end of a &nbsp;
816             if (g != nullptr && f > c && (g - f) < 12 && (isalnum(f[-1]) || f[-1] == '>' || (f[-1] == ';')) &&
817                     (isdigit(f[1]) || (f[1] == 'n')) && f[1] != '0' && ((g - f) <= 2 || isalpha(f[2])))
818             {
819                 ok = true;
820                 h = f + 2;
821                 while (h < g)
822                 {
823                     if (!isalnum(*h++))
824                     {
825                         ok = false;
826                         break;
827                     }
828                 }
829             }
830             else
831                 ok = false;
832 
833             h = f - 1;
834             if (ok)
835             {
836                 // Skip &nbsp;
837                 qCDebug(KIO_MAN_LOG) << "BEFORE SECTION:" <<  *h;
838                 if ((h > c + 5) && (! memcmp(h - 5, "&nbsp;", 6)))
839                 {
840                     h -= 6;
841                     qCDebug(KIO_MAN_LOG) << "Skip &nbsp;";
842                 }
843                 else if ( (h > (c + 6)) && (!memcmp(h - 6, "&#8239;", 7)) ) // &#8239;  narrow space
844                 {
845                     h -= 7;
846                 }
847                 else if (*h == ';')
848                 {
849                     // Not a non-breaking space, so probably not ok
850                     ok = false;
851                 }
852             }
853 
854             if (ok)
855             {
856                 /* this might be a link */
857                 /* skip html makeup */
858                 while (h > c && *h == '>')
859                 {
860                     while (h != c && *h != '<') h--;
861                     if (h != c) h--;
862                 }
863                 if (isalnum(*h))
864                 {
865                     char t, sec, *e;
866                     QByteArray fstr(f);
867                     e = h + 1;
868                     sec = f[1];
869                     const int index = fstr.indexOf(')', 2);
870                     QByteArray subsec;
871                     if (index != -1)
872                         subsec = fstr.mid(2, index - 2);
873                     else // No closing ')' found, take first character as subsection.
874                         subsec = fstr.mid(2, 1);
875                     while (h > c && (isalnum(h[-1]) || h[-1] == '_'
876                                      || h[-1] == ':' || h[-1] == '-' || h[-1] == '.'))
877                         h--;
878                     t = *h;
879                     *h = '\0';
880                     output_real(c);
881                     *h = t;
882                     t = *e;
883                     *e = '\0';
884                     QByteArray str("<a href=\"man:/");
885                     str += h;
886                     str += '(';
887                     str += char(sec);
888                     if (!subsec.isEmpty())
889                         str += subsec.toLower();
890                     str += ")\">";
891                     str += h;
892                     str += "</a>";
893                     output_real(str.data());
894                     *e = t;
895                     c = e;
896                 }
897             }
898             *f = '\0';
899             output_real(c);
900             *f = '(';
901             idtest[4] = f - 1;
902             c = f;
903             break; /* manpage */
904         case 3: /* ftp */
905         case 2: /* www */
906             g = f = idtest[j];
907             while (*g && (isalnum(*g) || *g == '_' || *g == '-' || *g == '+' ||
908                           *g == '.' || *g == '/')) g++;
909             if (g[-1] == '.') g--;
910             if (g - f > 4)
911             {
912                 char t;
913                 t = *f;
914                 *f = '\0';
915                 output_real(c);
916                 *f = t;
917                 t = *g;
918                 *g = '\0';
919                 QByteArray str;
920                 str.append("<A HREF=\"");
921                 str.append(j == 3 ? "ftp" : "http");
922                 str.append("://");
923                 str.append(f);
924                 str.append("\">");
925                 str.append(f);
926                 str.append("</A>");
927                 output_real(str.data());
928                 *g = t;
929                 c = g;
930             }
931             else
932             {
933                 f[3] = '\0';
934                 output_real(c);
935                 c = f + 3;
936                 f[3] = '.';
937             }
938             break;
939         case 1: /* mailto */
940             g = f = idtest[1];
941             while (g > c && (isalnum(g[-1]) || g[-1] == '_' || g[-1] == '-' ||
942                              g[-1] == '+' || g[-1] == '.' || g[-1] == '%')) g--;
943             if (g - 7 >= c && g[-1] == ':')
944             {
945                 // We have perhaps an email address starting with mailto:
946                 if (!qstrncmp("mailto:", g - 7, 7))
947                     g -= 7;
948             }
949             h = f + 1;
950             while (*h && (isalnum(*h) || *h == '_' || *h == '-' || *h == '+' ||
951                           *h == '.')) h++;
952             if (*h == '.') h--;
953             if (h - f > 4 && f - g > 1)
954             {
955                 char t;
956                 t = *g;
957                 *g = '\0';
958                 output_real(c);
959                 *g = t;
960                 t = *h;
961                 *h = '\0';
962                 QByteArray str;
963                 str.append("<A HREF=\"mailto:");
964                 str.append(g);
965                 str.append("\">");
966                 str.append(g);
967                 str.append("</A>");
968                 output_real(str.data());
969                 *h = t;
970                 c = h;
971             }
972             else
973             {
974                 *f = '\0';
975                 output_real(c);
976                 *f = '@';
977                 idtest[1] = c;
978                 c = f;
979             }
980             break;
981         case 0: /* url */
982             g = f = idtest[0]; // ://foo...
983 
984             // backup before :// to get protocol
985             while (g > c && isalpha(g[-1]) && islower(g[-1])) g--;
986             h = f + 3; // start past ://
987             // determine length of path and part of query it looks like...
988             while (*h && !isspace(*h) && *h != '<' && *h != '>' && *h != '"' &&
989                     *h != '&') h++;
990             // if protocol length 3-6 characters and path has any length at all...
991             // more tests added because this code breaks stylesheet links that use
992             // the correct file:/// stuff.
993             if (f - g > 2 && f - g < 7 && h - f > 3 && (strstr(c, "http://") != nullptr || strstr(c, "ftp://") != nullptr))
994             {
995                 char t;
996                 t = *g;
997                 *g = '\0';
998                 output_real(c);
999                 *g = t;
1000                 t = *h;
1001                 *h = '\0';
1002                 QByteArray str;
1003                 str.append("<A HREF=\"");
1004                 str.append(g);
1005                 str.append("\">");
1006                 str.append(g);
1007                 str.append("</A>");
1008                 output_real(str.data());
1009                 *h = t;
1010                 c = h;
1011             }
1012             else
1013             {
1014                 f[1] = '\0';
1015                 output_real(c);
1016                 f[1] = '/';
1017                 c = f + 1;
1018             }
1019             break;
1020         default:
1021             break;
1022         }
1023         nr = 0;
1024         if (idtest[0] && idtest[0] <= c) idtest[0] = strstr(c + 1, "://");
1025         if (idtest[1] && idtest[1] <= c) idtest[1] = strchr(c + 1, '@');
1026         if (idtest[2] && idtest[2] < c) idtest[2] = strstr(c, "www.");
1027         if (idtest[3] && idtest[3] < c) idtest[3] = strstr(c, "ftp.");
1028         if (idtest[4] && idtest[4] <= c) idtest[4] = strchr(c + 1, '(');
1029         if (idtest[5] && idtest[5] <= c) idtest[5] = strstr(c + 1, ".h&gt;");
1030         for (i = 0; i < numtests; i++) nr += (idtest[i] != nullptr);
1031     }
1032     output_real(c);
1033 }
1034 
1035 //---------------------------------------------------------------------
1036 
1037 static QByteArray current_font;
1038 static int current_size = 0;
1039 
1040 /*
1041  "fillout" is the mode of text output:
1042  1 = fill mode (line breaks happen when the browser wants them. Normal HTML text)
1043  0 = no-fill mode (preformatted text (<pre>..</pre>).
1044      Input lines are output as-is, retaining line breaks and ignoring the current line length.
1045 */
1046 static int fillout = 1;
1047 
1048 //---------------------------------------------------------------------
1049 
out_html(const char * c)1050 static void out_html(const char *c)
1051 {
1052     if ( !c || !*c ) return;
1053 
1054     // Added, probably due to the const?
1055     char *c2 = qstrdup(c);
1056     char *c3 = c2;
1057 
1058     static int obp = 0;
1059 
1060     if (no_newline_output)
1061     {
1062         int i = 0;
1063         no_newline_output = 1;
1064         while (c2[i])
1065         {
1066             if (!no_newline_output) c2[i-1] = c2[i];
1067             if (c2[i] == '\n') no_newline_output = 0;
1068             i++;
1069         }
1070         if (!no_newline_output) c2[i-1] = 0;
1071     }
1072     if (scaninbuff)
1073     {
1074         while (*c2)
1075         {
1076             if (buffpos >= buffmax)
1077             {
1078                 char *h = new char[buffmax*2];
1079 
1080                 memcpy(h, buffer, buffmax);
1081                 delete [] buffer;
1082                 buffer = h;
1083                 buffmax = buffmax * 2;
1084             }
1085             buffer[buffpos++] = *c2++;
1086         }
1087     }
1088     else if (output_possible)
1089     {
1090         while (*c2)
1091         {
1092             outbuffer[obp++] = *c2;
1093             if (*c2 == '\n' || obp >= HUGE_STR_MAX)
1094             {
1095                 outbuffer[obp] = '\0';
1096                 add_links(outbuffer);
1097                 obp = 0;
1098             }
1099             c2++;
1100         }
1101     }
1102     delete [] c3;
1103 }
1104 
1105 //---------------------------------------------------------------------
1106 
checkListStack()1107 void checkListStack()  // see if we need to end a previously begun list item
1108 {
1109     if ( !listItemStack.isEmpty() && (listItemStack.size() == itemdepth) )
1110     {
1111         out_html("</");
1112         out_html(listItemStack.pop());
1113         out_html(">");
1114     }
1115 }
1116 
1117 //---------------------------------------------------------------------
1118 
set_font(const QByteArray & name)1119 static QByteArray set_font(const QByteArray& name)
1120 {
1121     // Every font but R (Regular) creates <span> elements
1122     QByteArray markup;
1123     if ( (current_font != "R") && (current_font != "P") && !current_font.isEmpty() )
1124         markup += "</span>";
1125     const uint len = name.length();
1126     bool fontok = true;
1127     if (len == 1)
1128     {
1129         const char lead = name[0];
1130         switch (lead)
1131         {
1132         case 'P': // ### TODO: this seems to mean "precedent font"
1133         case 'R':
1134             break; // regular, do nothing
1135         case 'I':
1136             markup += "<span style=\"font-style:italic\">";
1137             break;
1138         case 'B':
1139             markup += "<span style=\"font-weight:bold\">";
1140             break;
1141         case 'L':
1142             markup += "<span style=\"font-family:monospace\">";
1143             break; // ### What's L?
1144         default:
1145             fontok = false;
1146         }
1147     }
1148     else if (len == 2)
1149     {
1150         if (name == "BI")
1151             markup += "<span style=\"font-style:italic;font-weight:bold\">";
1152         // Courier
1153         else if (name == "CR")
1154             markup += "<span style=\"font-family:monospace\">";
1155         else if (name == "CW")   // CW is used by pod2man(1) (part of perldoc(1))
1156             markup += "<span style=\"font-family:monospace\">";
1157         else if (name == "CI")
1158             markup += "<span style=\"font-family:monospace;font-style:italic\">";
1159         else if (name == "CB")
1160             markup += "<span style=\"font-family:monospace;font-weight:bold\">";
1161         // Times
1162         else if (name == "TR")
1163             markup += "<span style=\"font-family:serif\">";
1164         else if (name == "TI")
1165             markup += "<span style=\"font-family:serif;font-style:italic\">";
1166         else if (name == "TB")
1167             markup += "<span style=\"font-family:serif;font-weight:bold\">";
1168         // Helvetica
1169         else if (name == "HR")
1170             markup += "<span style=\"font-family:sans-serif\">";
1171         else if (name == "HI")
1172             markup += "<span style=\"font-family:sans-serif;font-style:italic\">";
1173         else if (name == "HB")
1174             markup += "<span style=\"font-family:sans-serif;font-weight:bold\">";
1175         else
1176             fontok = false;
1177     }
1178     else if (len == 3)
1179     {
1180         if (name == "CBI")
1181             markup += "<span style=\"font-family:monospace;font-style:italic;font-weight:bold\">";
1182         else if (name == "TBI")
1183             markup += "<span style=\"font-family:serif;font-style:italic;font-weight:bold\">";
1184         else if (name == "HBI")
1185             markup += "<span style=\"font-family:sans-serif;font-style:italic;font-weight:bold\">";
1186         else
1187             fontok = false;
1188     }
1189     else
1190         fontok = false;
1191 
1192     if (fontok)
1193         current_font = name;
1194     else
1195         current_font = "R"; // Still nothing, then it is 'R' (Regular) // krazy:exclude=doublequote_chars
1196     return markup;
1197 }
1198 
1199 //---------------------------------------------------------------------
1200 
change_to_size(int nr)1201 static QByteArray change_to_size(int nr)
1202 {
1203     switch (nr)
1204     {
1205     case '0':
1206     case '1':
1207     case '2':
1208     case '3':
1209     case '4':
1210     case '5':
1211     case '6':
1212     case '7':
1213     case '8':
1214     case '9':
1215         nr = nr - '0';
1216         break;
1217     case '\0':
1218         break;
1219     default:
1220         nr = current_size + nr;
1221         if (nr > 9) nr = 9;
1222         if (nr < -9) nr = -9;
1223         break;
1224     }
1225     if (nr == current_size)
1226         return "";
1227     const QByteArray font(current_font);
1228     QByteArray markup;
1229     markup = set_font("R");
1230     if (current_size)
1231         markup += "</span>";
1232     current_size = nr;
1233     if (nr)
1234     {
1235         int percent = 100 + nr*1;
1236         markup += "<span style=\"font-size:";
1237         markup += QByteArray::number(percent);
1238         markup += "%\">";
1239     }
1240     markup += set_font(font);
1241     return markup;
1242 }
1243 
1244 //---------------------------------------------------------------------
1245 
1246 /* static int asint=0; */
1247 static int intresult = 0;
1248 
1249 static bool skip_escape = false;
1250 static bool single_escape = false;
1251 
1252 static char *scan_escape_direct(char *c, QByteArray& cstr);
1253 
1254 /**
1255  * scan a named character
1256  * param c position
1257  */
scan_named_character(char * & c)1258 static QByteArray scan_named_character(char*& c)
1259 {
1260     QByteArray name;
1261     if (*c == '(')
1262     {
1263         // \*(ab  Name of two characters
1264         if (c[1] == escapesym)
1265         {
1266             QByteArray cstr;
1267             c = scan_escape_direct(c + 2, cstr);
1268             // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1269             name = cstr;
1270         }
1271         else
1272         {
1273             name += c[1];
1274             name += c[2];
1275             c += 3;
1276         }
1277     }
1278     else if (*c == '[')
1279     {
1280         // \*[long_name]  Long name
1281         // Named character groff(7)
1282         // We must find the ] to get a name
1283         c++;
1284         while (*c && *c != ']' && *c != '\n')
1285         {
1286             if (*c == escapesym)
1287             {
1288                 QByteArray cstr;
1289                 c = scan_escape_direct(c + 1, cstr);
1290                 const int result = cstr.indexOf(']');
1291                 if (result == -1)
1292                     name += cstr;
1293                 else
1294                 {
1295                     // Note: we drop the characters after the ]
1296                     name += cstr.left(result);
1297                 }
1298             }
1299             else
1300             {
1301                 name += *c;
1302                 c++;
1303             }
1304         }
1305         if (!*c || *c == '\n')
1306         {
1307             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse character name: " << BYTEARRAY(name);
1308             return "";
1309         }
1310         c++;
1311     }
1312     else if (*c == 'C' || c[1] == '\'')
1313     {
1314         // \C'name'
1315         c += 2;
1316         while (*c && *c != '\'' && *c != '\n')
1317         {
1318             if (*c == escapesym)
1319             {
1320                 QByteArray cstr;
1321                 c = scan_escape_direct(c + 1, cstr);
1322                 const int result = cstr.indexOf('\'');
1323                 if (result == -1)
1324                     name += cstr;
1325                 else
1326                 {
1327                     // Note: we drop the characters after the ]
1328                     name += cstr.left(result);
1329                 }
1330             }
1331             else
1332             {
1333                 name += *c;
1334                 c++;
1335             }
1336         }
1337         if (!*c || *c == '\n')
1338         {
1339             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse (\\C mode) character name: " << BYTEARRAY(name);
1340             return "";
1341         }
1342         c++;
1343     }
1344     // Note: characters with a one character length name do not exist, as they would collide with other escapes
1345 
1346     // Now we have the name, let us find it between the string names
1347     QMap<QByteArray, StringDefinition>::const_iterator it = s_characterDefinitionMap.constFind(name);
1348     if (it == s_characterDefinitionMap.constEnd())
1349     {
1350         qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find character with name: " << BYTEARRAY(name);
1351         // No output, as an undefined string is empty by default
1352         return "";
1353     }
1354     else
1355     {
1356         qCDebug(KIO_MAN_LOG) << "Character with name: \"" << BYTEARRAY(name) << "\" => " << BYTEARRAY((*it).m_output);
1357         return (*it).m_output;
1358     }
1359 }
1360 
1361 //---------------------------------------------------------------------
1362 
scan_named_string(char * & c)1363 static QByteArray scan_named_string(char*& c)
1364 {
1365     QByteArray name;
1366     if (*c == '(')
1367     {
1368         // \*(ab  Name of two characters
1369         if (c[1] == escapesym)
1370         {
1371             QByteArray cstr;
1372             c = scan_escape_direct(c + 2, cstr);
1373             qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr);
1374             // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1375             name = cstr;
1376         }
1377         else
1378         {
1379             name += c[1];
1380             name += c[2];
1381             c += 3;
1382         }
1383     }
1384     else if (*c == '[')
1385     {
1386         // \*[long_name]  Long name
1387         // Named character groff(7)
1388         // We must find the ] to get a name
1389         c++;
1390         while (*c && *c != ']' && *c != '\n')
1391         {
1392             if (*c == escapesym)
1393             {
1394                 QByteArray cstr;
1395                 c = scan_escape_direct(c + 1, cstr);
1396                 const int result = cstr.indexOf(']');
1397                 if (result == -1)
1398                     name += cstr;
1399                 else
1400                 {
1401                     // Note: we drop the characters after the ]
1402                     name += cstr.left(result);
1403                 }
1404             }
1405             else
1406             {
1407                 name += *c;
1408                 c++;
1409             }
1410         }
1411         if (!*c || *c == '\n')
1412         {
1413             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse string name: " << BYTEARRAY(name);
1414             return "";
1415         }
1416         c++;
1417     }
1418     else
1419     {
1420         // \*a Name of one character
1421         name += *c;
1422         c++;
1423     }
1424     // Now we have the name, let us find it between the string names
1425     QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(name);
1426     if (it == s_stringDefinitionMap.constEnd())
1427     {
1428         // try a number register:
1429         return QByteArray::number(getNumberRegisterValue(name));
1430 
1431         //qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string with name: " << BYTEARRAY(name);
1432         // No output, as an undefined string is empty by default
1433         //return "";
1434     }
1435     else
1436     {
1437         qCDebug(KIO_MAN_LOG) << "String with name: '" << BYTEARRAY(name) << "' => >>>" << BYTEARRAY((*it).m_output) << "<<<";
1438         return (*it).m_output;
1439     }
1440 }
1441 
1442 //---------------------------------------------------------------------
1443 
scan_dollar_parameter(char * & c)1444 static QByteArray scan_dollar_parameter(char*& c)
1445 {
1446     int argno = 0; // No dollar argument number yet!
1447     if (*c == '0')
1448     {
1449         //qCDebug(KIO_MAN_LOG) << "$0";
1450         c++;
1451         return s_dollarZero;
1452     }
1453     else if (*c >= '1' && *c <= '9')
1454     {
1455         //qCDebug(KIO_MAN_LOG) << "$ direct";
1456         argno = (*c - '0');
1457         c++;
1458     }
1459     else if (*c == '(')
1460     {
1461         //qCDebug(KIO_MAN_LOG) << "$(";
1462         if (c[1] && c[2] && c[1] >= '0' && c[1] <= '9' && c[2] >= '0' && c[2] <= '9')
1463         {
1464             argno = (c[1] - '0') * 10 + (c[2] - '0');
1465             c += 3;
1466         }
1467         else
1468         {
1469             if (!c[1])
1470                 c++;
1471             else if (!c[2])
1472                 c += 2;
1473             else
1474                 c += 3;
1475             return "";
1476         }
1477     }
1478     else if (*c == '[')
1479     {
1480         //qCDebug(KIO_MAN_LOG) << "$[";
1481         argno = 0;
1482         c++;
1483         while (*c && *c >= '0' && *c <= '9' && *c != ']')
1484         {
1485             argno *= 10;
1486             argno += (*c - '0');
1487             c++;
1488         }
1489         if (*c != ']')
1490         {
1491             return "";
1492         }
1493         c++;
1494     }
1495     else if ((*c == '*') || (*c == '@'))
1496     {
1497         const bool quote = (*c == '@');
1498         QList<QByteArray>::const_iterator it = s_argumentList.constBegin();
1499         QByteArray param;
1500         bool space = false;
1501         for (; it != s_argumentList.constEnd(); ++it)
1502         {
1503             if (space)
1504                 param += ' ';
1505             if (quote)
1506                 param += '\"'; // Not as HTML, as it could be used by macros !
1507             param += (*it);
1508             if (quote)
1509                 param += '\"'; // Not as HTML, as it could be used by macros!
1510             space = true;
1511         }
1512         c++;
1513         return param;
1514     }
1515     else
1516     {
1517         qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown parameter $" << *c;
1518         return "";
1519     }
1520     //qCDebug(KIO_MAN_LOG) << "ARG $" << argno;
1521     if (!s_argumentList.isEmpty() && argno > 0)
1522     {
1523         //qCDebug(KIO_MAN_LOG) << "ARG $" << argno << " OK!";
1524         argno--;
1525         if (argno >= s_argumentList.size())
1526         {
1527             qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find parameter $" << (argno + 1);
1528             return "";
1529         }
1530 
1531         return s_argumentList[argno];
1532     }
1533     return "";
1534 }
1535 
1536 //---------------------------------------------------------------------
1537 /// return the value of read-only number registers
1538 
read_only_number_register(const QByteArray & name)1539 static int read_only_number_register(const QByteArray& name)
1540 {
1541     // Internal read-only variables
1542     if (name == ".$")
1543     {
1544         qCDebug(KIO_MAN_LOG) << "\\n[.$] == " << s_argumentList.size();
1545         return s_argumentList.size();
1546     }
1547     else if (name == ".g")
1548         return 0; // We are not groff(1)
1549     else if (name == ".s")
1550         return current_size;
1551 #if 0
1552     // ### TODO: map the fonts to a number
1553     else if (name == ".f")
1554         return current_font;
1555 #endif
1556     else if (name == ".P")
1557         return 0; // We are not printing
1558     else if (name == ".A")
1559         return s_nroff;
1560 #ifndef SIMPLE_MAN2HTML
1561     // Special KDE KIO man:
1562     const QString version_string(KDE_VERSION_STRING);
1563     const int version_major = version_string.section('.', 0, 0).toInt();
1564     const int version_minor = version_string.section('.', 1, 1).toInt();
1565     const int version_patch = version_string.section('.', 2, 2).toInt();
1566     if (name == ".KDE_VERSION_MAJOR")
1567         return version_major;
1568     else if (name == ".KDE_VERSION_MINOR")
1569         return version_minor;
1570     else if (name == ".KDE_VERSION_RELEASE")
1571         return version_patch;
1572     else if (name == ".KDE_VERSION")
1573         return (version_major << 16) | (version_minor << 8) | version_patch;
1574 #endif
1575     else if ( name == ".T" )
1576         return 0;  // Set to 1 in nroff, if -T option used; always 0 in troff.
1577 
1578     // ### TODO: groff defines many more read-only number registers
1579     qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown read-only number register: " << BYTEARRAY(name);
1580 
1581     return 0; // Undefined variable
1582 
1583 }
1584 
1585 //---------------------------------------------------------------------
1586 
getNumberRegisterValue(const QByteArray & name,int sign)1587 static int getNumberRegisterValue(const QByteArray &name, int sign)
1588 {
1589     if (name[0] == '.')
1590     {
1591         return read_only_number_register(name);
1592     }
1593     else
1594     {
1595         QMap< QByteArray, NumberDefinition >::iterator it = s_numberDefinitionMap.find(name);
1596         if (it == s_numberDefinitionMap.end())
1597         {
1598             return 0; // Undefined variable
1599         }
1600         else
1601         {
1602             (*it).m_value += sign * (*it).m_increment;
1603             return (*it).m_value;
1604         }
1605     }
1606 }
1607 
1608 //---------------------------------------------------------------------
1609 /// get the value of a number register and auto-increment if asked
1610 
scan_number_register(char * & c)1611 static int scan_number_register(char*& c)
1612 {
1613     int sign = 0; // Sign for auto-increment (if any)
1614     switch (*c)
1615     {
1616     case '+':
1617         sign = 1;
1618         c++;
1619         break;
1620     case '-':
1621         sign = -1;
1622         c++;
1623         break;
1624     default:
1625         break;
1626     }
1627     QByteArray name;
1628     if (*c == '[')
1629     {
1630         c++;
1631         if (*c == '+')
1632         {
1633             sign = 1;
1634             c++;
1635         }
1636         else if (*c == '-')
1637         {
1638             sign = -1;
1639             c++;
1640         }
1641         while (*c && *c != ']' && *c != '\n')
1642         {
1643             // ### TODO: a \*[string] could be inside and should be processed
1644             name += *c;
1645             c++;
1646         }
1647         if (!*c || *c == '\n')
1648         {
1649             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse number register name: " << BYTEARRAY(name);
1650             return 0;
1651         }
1652         c++;
1653     }
1654     else if (*c == '(')
1655     {
1656         c++;
1657         if (*c == '+')
1658         {
1659             sign = 1;
1660             c++;
1661         }
1662         else if (*c == '-')
1663         {
1664             sign = -1;
1665             c++;
1666         }
1667         name += c[0];
1668         name += c[1];
1669         c += 2;
1670     }
1671     else
1672     {
1673         name += *c;
1674         c++;
1675     }
1676 
1677     return getNumberRegisterValue(name, sign);
1678 }
1679 
1680 //---------------------------------------------------------------------
1681 // scan a name from the following
1682 // x     ... return x    (one char)
1683 // (xx   ... return xx   (two chars)
1684 // [xxx] ... return xxx  (any chars)
1685 // after scanning, c points to the terminating char (0, \n or ])
1686 
scan_name(char * & c)1687 static QByteArray scan_name(char *&c)
1688 {
1689     QByteArray name;
1690     if ( *c == '(' )
1691     {
1692         int i = 0;
1693         for (c++; *c && (*c != '\n') && (i < 2); c++, i++)
1694             name += *c;
1695     }
1696     else if ( *c == '[' )
1697     {
1698         for (c++; *c && (*c != ']') && (*c != '\n'); c++)
1699             name += *c;
1700     }
1701     else
1702         name += *c;
1703 
1704     return name;
1705 }
1706 
1707 //---------------------------------------------------------------------
1708 /// get and set font
1709 
scan_named_font(char * & c)1710 static QByteArray scan_named_font(char*& c)
1711 {
1712     QByteArray name;
1713     if (*c == '(')
1714     {
1715         // \f(ab  Name of two characters
1716         if (c[1] == escapesym)
1717         {
1718             QByteArray cstr;
1719             c = scan_escape_direct(c + 2, cstr);
1720             qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr);
1721             // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1722             name = cstr;
1723         }
1724         else
1725         {
1726             name += c[1];
1727             name += c[2];
1728             c += 3;
1729         }
1730     }
1731     else if (*c == '[')
1732     {
1733         // \f[long_name]  Long name
1734         // We must find the ] to get a name
1735         c++;
1736         while (*c && *c != ']' && *c != '\n')
1737         {
1738             if (*c == escapesym)
1739             {
1740                 QByteArray cstr;
1741                 c = scan_escape_direct(c + 1, cstr);
1742                 const int result = cstr.indexOf(']');
1743                 if (result == -1)
1744                     name += cstr;
1745                 else
1746                 {
1747                     // Note: we drop the characters after the ]
1748                     name += cstr.left(result);
1749                 }
1750             }
1751             else
1752             {
1753                 name += *c;
1754                 c++;
1755             }
1756         }
1757         if (!*c || *c == '\n')
1758         {
1759             qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse font name: " << BYTEARRAY(name);
1760             return "";
1761         }
1762         c++;
1763     }
1764     else if ( *c )  // \f alone makes c point at 0-byte
1765     {
1766         // \fa Font name with one character or one digit
1767         // ### HACK do *not* use:  name = *c;  or name would be empty
1768         name += *c;
1769         c++;
1770     }
1771     //qCDebug(KIO_MAN_LOG) << "FONT NAME: " << BYTEARRAY( name );
1772     // Now we have the name, let us find the font
1773     bool ok = false;
1774     const unsigned int number = name.toUInt(&ok);
1775     if (ok)
1776     {
1777         if (number < 5)
1778         {
1779             const char* const fonts[] = { "R", "I", "B", "BI", "CR" }; // Regular, Italic, Bold, Bold Italic, Courier regular
1780             name = fonts[ number ];
1781         }
1782         else
1783         {
1784             qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has too big number: " << BYTEARRAY(name) << " => " << number;
1785             name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars
1786         }
1787     }
1788     else if (name.isEmpty())
1789     {
1790         qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has no name => using R";
1791         name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars
1792     }
1793     if (!skip_escape)
1794         return set_font(name);
1795     else
1796         return "";
1797 }
1798 
1799 //---------------------------------------------------------------------
1800 
scan_number_code(char * & c)1801 static QByteArray scan_number_code(char*& c)
1802 {
1803     QByteArray number;
1804     if (*c != '\'')
1805         return "";
1806     c++; // Go past the opening single quote
1807     while (*c && (*c != '\n') && (*c != '\''))
1808     {
1809         number += *c;
1810         c++;
1811     }
1812     bool ok = false;
1813     unsigned int result = number.toUInt(&ok);
1814     if ((result < ' ') || (result > 65535))
1815         return "";
1816     else if (result == '\t')
1817     {
1818         curpos += 8;
1819         curpos &= 0xfff8;
1820         return "\t";
1821     }
1822     number.setNum(result);
1823     number.prepend("&#");
1824     number.append(";");
1825     curpos ++;
1826     c++; // Go past the closing single quote
1827     return number;
1828 }
1829 
1830 //---------------------------------------------------------------------
1831 // ### TODO known missing escapes from groff(7):
1832 // ### TODO \R
1833 
scan_escape_direct(char * c,QByteArray & cstr)1834 static char *scan_escape_direct(char *c, QByteArray& cstr)
1835 {
1836     bool exoutputp;
1837     bool exskipescape;
1838     int i, j;
1839     bool cplusplus = true; // Should the c++ call be executed at the end of the function
1840 
1841     cstr.clear();
1842     intresult = 0;
1843     switch (*c)
1844     {
1845     case 'e':
1846         cstr += escapesym;
1847         curpos++;
1848         break;
1849     case '0': // space of digit width
1850         cstr = "&#8199;";  // Unicode FIGURE SPACE
1851         curpos++;
1852         break;
1853     case '~': // non-breakable-space (resizeable!)
1854     case ' ':
1855         cstr = "&nbsp;";
1856         curpos++;
1857         break;
1858     case '|': // half-non-breakable-space
1859     case '^': // quarter-non-breakable-space
1860         cstr = "&#8239;";  // Unicode NARROW NO-BREAK SPACE
1861         curpos++;
1862         break;
1863     case ':':
1864         break;  // ignore optional line break
1865     case ',':
1866         break;  //  left italic correction, always a zero motion
1867     case '/':
1868         cstr = "&#8201;";  // Unicode THIN SPACE
1869         curpos++;
1870         break;  // italic correction, i.e. a small piece of horizontal motion
1871     case '"': // comment. skip rest of line
1872         for (c++; *c && (*c != '\n'); c++) ;
1873         cplusplus = false;
1874         break;
1875     // ### TODO \# like \" but does not ignore the end of line (groff(7))
1876     case '$':
1877     {
1878         c++;
1879         cstr = scan_dollar_parameter(c);
1880         cplusplus = false;
1881         break;
1882     }
1883     case 'z':
1884     {
1885         c++;
1886         if (*c == '\\')
1887         {
1888             c = scan_escape_direct(c + 1, cstr);
1889             c--;
1890         }
1891         else
1892             cstr = QByteArray(c, 1);
1893         break;
1894     }
1895     case 'k':
1896     {
1897         // Store the current horizontal position in the _input_ line in
1898         // number register with name POSITION
1899         c++;
1900         cstr = scan_name(c);
1901         cstr.clear(); // TODO not implemented; discard it
1902         break;
1903     }
1904     case '!':
1905     case '%':
1906     case 'a':
1907     case 'd':
1908     case 'r':
1909     case 'u':
1910     case '\n':
1911     case '&': // Non-printing, zero width character
1912     case ')': // Transparent non-printing zero width character
1913         break;
1914     case '(':
1915     case '[':
1916     case 'C':
1917     {
1918         // Do not go forward as scan_named_character needs the leading symbol
1919         cstr = scan_named_character(c);
1920         cplusplus = false;
1921         break;
1922     }
1923     case '*':
1924     {
1925         c++;
1926         cstr = scan_named_string(c);
1927         cplusplus = false;
1928         break;
1929     }
1930     case 'f':
1931     {
1932         c++;
1933         cstr = scan_named_font(c);
1934         cplusplus = false;
1935         break;
1936     }
1937     case 'F':  // font family
1938     {
1939         c++;
1940         cstr = scan_name(c);
1941 
1942         if ( cstr == "C" )
1943             cstr = set_font("CR");
1944         else if ( cstr == "T" )
1945             cstr = set_font("TR");
1946         else if ( cstr == "H" )
1947             cstr = set_font("HR");
1948         else
1949             cstr = set_font(cstr);
1950 
1951         break;
1952     }
1953     case 'm': // color
1954     {
1955         c++;
1956         cstr = scan_name(c);
1957 
1958         if ( cstr.isEmpty() )
1959             cstr = "</span>";
1960         else
1961             cstr = "<span style='color:" + cstr + "'>";
1962 
1963         break;
1964     }
1965     case 's': // ### FIXME: many forms are missing
1966         c++;
1967         j = 0;
1968         i = 0;
1969         if (*c == '-')
1970         {
1971             j = -1;
1972             c++;
1973         }
1974         else if (*c == '+')
1975         {
1976             j = 1;
1977             c++;
1978         }
1979         if (*c == '0') c++;
1980         else if (*c == '\\')
1981         {
1982             c++;
1983             c = scan_escape_direct(c, cstr);
1984             i = intresult;
1985             if (!j) j = 1;
1986         }
1987         else
1988             while (isdigit(*c) && (!i || (!j && i < 4))) i = i * 10 + (*c++) - '0';
1989         if (!j)
1990         {
1991             j = 1;
1992             if (i) i = i - 10;
1993         }
1994         if (!skip_escape) cstr = change_to_size(i * j);
1995         c--;
1996         break;
1997     case 'n':
1998     {
1999         c++;
2000         intresult = scan_number_register(c);
2001         cplusplus = false;
2002         break;
2003     }
2004     case 'w':
2005         c++;
2006         i = *c;
2007         c++;
2008         exoutputp = output_possible;
2009         exskipescape = skip_escape;
2010         output_possible = false;
2011         skip_escape = true;
2012         j = 0;
2013         while (*c != i)
2014         {
2015             j++;
2016             if (*c == escapesym)
2017                 c = scan_escape_direct(c + 1, cstr);
2018             else
2019                 c++;
2020         }
2021         output_possible = exoutputp;
2022         skip_escape = exskipescape;
2023         intresult = j;
2024         break;
2025     case 'l':
2026         cstr = "<HR>";
2027         curpos = 0;
2028     case 'b':
2029     case 'v':
2030     case 'x':
2031     case 'o':
2032     case 'L':
2033     case 'h':
2034         c++;
2035         i = *c;
2036         c++;
2037         exoutputp = output_possible;
2038         exskipescape = skip_escape;
2039         output_possible = 0;
2040         skip_escape = true;
2041         while (*c != i)
2042             if (*c == escapesym) c = scan_escape_direct(c + 1, cstr);
2043             else c++;
2044         output_possible = exoutputp;
2045         skip_escape = exskipescape;
2046         break;
2047     case 'c':
2048         no_newline_output = 1;
2049         break;
2050     case '{':
2051         newline_for_fun++;
2052         break; // Start conditional block
2053     case '}':
2054         if (newline_for_fun) newline_for_fun--;
2055         break; // End conditional block
2056     case 'p':
2057         cstr = "<BR>\n";
2058         curpos = 0;
2059         break;
2060     case 't':
2061         cstr = "\t";
2062         curpos = (curpos + 8) & 0xfff8;
2063         break;
2064     case '<':
2065         cstr = "&lt;";
2066         curpos++;
2067         break;
2068     case '>':
2069         cstr = "&gt;";
2070         curpos++;
2071         break;
2072     case '\\':
2073     {
2074         if (single_escape)
2075             c--;
2076         else
2077             cstr = "\\";
2078         break;
2079     }
2080     case 'N':
2081     {
2082         c++;
2083         cstr = scan_number_code(c);
2084         cplusplus = false;
2085         break;
2086     }
2087     case '\'':
2088         cstr = "&acute;";
2089         curpos++;
2090         break; // groff(7) ### TODO verify
2091     case '`':
2092         cstr = "`"; // krazy:exclude=doublequote_chars
2093         curpos++;
2094         break; // groff(7)
2095     case '-':
2096         cstr = "-"; // krazy:exclude=doublequote_chars
2097         curpos++;
2098         break; // groff(7)
2099     case '.':
2100         cstr = "."; // krazy:exclude=doublequote_chars
2101         curpos++;
2102         break; // groff(7)
2103     default:
2104         cstr = QByteArray(c, 1);
2105         curpos++;
2106         break;
2107     }
2108     if (cplusplus && *c)
2109         c++;
2110     return c;
2111 }
2112 
2113 //---------------------------------------------------------------------
2114 
scan_escape(char * c)2115 static char *scan_escape(char *c)
2116 {
2117     QByteArray cstr;
2118     char* result = scan_escape_direct(c, cstr);
2119     if (!skip_escape)
2120         out_html(cstr);
2121     return result;
2122 }
2123 
2124 //---------------------------------------------------------------------
2125 
2126 class TABLEROW;
2127 
2128 class TABLEITEM
2129 {
2130 public:
2131     TABLEITEM(TABLEROW *row);
~TABLEITEM()2132     ~TABLEITEM()
2133     {
2134         delete [] contents;
2135     }
setContents(const char * _contents)2136     void setContents(const char *_contents)
2137     {
2138         delete [] contents;
2139         contents = qstrdup(_contents);
2140     }
getContents() const2141     const char *getContents() const
2142     {
2143         return contents;
2144     }
2145 
init()2146     void init()
2147     {
2148         delete [] contents;
2149         contents = nullptr;
2150         size = 0;
2151         align = 0;
2152         valign = 0;
2153         colspan = 1;
2154         rowspan = 1;
2155         font = 0;
2156         vleft = 0;
2157         vright = 0;
2158         space = 0;
2159         width = 0;
2160     }
2161 
copyLayout(const TABLEITEM * orig)2162     void copyLayout(const TABLEITEM *orig)
2163     {
2164         size = orig->size;
2165         align = orig->align;
2166         valign = orig->valign;
2167         colspan = orig->colspan;
2168         rowspan = orig->rowspan;
2169         font = orig->font;
2170         vleft = orig->vleft;
2171         vright = orig->vright;
2172         space = orig->space;
2173         width = orig->width;
2174     }
2175 
2176 public:
2177     int size, align, valign, colspan, rowspan, font, vleft, vright, space, width;
2178 
2179 private:
2180     char *contents;
2181     TABLEROW *_parent;
2182 };
2183 
2184 class TABLEROW
2185 {
2186     char *test;
2187 public:
TABLEROW()2188     TABLEROW()
2189     {
2190         test = new char;
2191         prev = nullptr;
2192         next = nullptr;
2193     }
~TABLEROW()2194     ~TABLEROW()
2195     {
2196         qDeleteAll(items);
2197         items.clear();
2198         delete test;
2199 
2200     }
length() const2201     int length() const
2202     {
2203         return items.count();
2204     }
has(int index)2205     bool has(int index)
2206     {
2207         return (index >= 0) && (index < (int)items.count());
2208     }
at(int index)2209     TABLEITEM &at(int index)
2210     {
2211         return *items.at(index);
2212     }
2213 
2214     TABLEROW *copyLayout() const;
2215 
addItem(TABLEITEM * item)2216     void addItem(TABLEITEM *item)
2217     {
2218         items.append(item);
2219     }
2220     TABLEROW *prev, *next;
2221 
2222 private:
2223     QList<TABLEITEM*> items;
2224 };
2225 
TABLEITEM(TABLEROW * row)2226 TABLEITEM::TABLEITEM(TABLEROW *row) : contents(nullptr), _parent(row)
2227 {
2228     init();
2229     _parent->addItem(this);
2230 }
2231 
copyLayout() const2232 TABLEROW *TABLEROW::copyLayout() const
2233 {
2234     TABLEROW *newrow = new TABLEROW();
2235 
2236     QListIterator<TABLEITEM *> it(items);
2237     while (it.hasNext())
2238     {
2239         TABLEITEM *newitem = new TABLEITEM(newrow);
2240         newitem->copyLayout(it.next());
2241     }
2242     return newrow;
2243 }
2244 
2245 static const char * const tableopt[] = { "center", "expand", "box", "allbox",
2246                                          "doublebox", "tab", "linesize",
2247                                          "delim", nullptr
2248                                        };
2249 static const int tableoptl[] = { 6, 6, 3, 6, 9, 3, 8, 5, 0};
2250 
2251 
clear_table(TABLEROW * table)2252 static void clear_table(TABLEROW *table)
2253 {
2254     TABLEROW *tr1, *tr2;
2255 
2256     tr1 = table;
2257     while (tr1->prev) tr1 = tr1->prev;
2258     while (tr1)
2259     {
2260         tr2 = tr1;
2261         tr1 = tr1->next;
2262         delete tr2;
2263     }
2264 }
2265 
2266 //---------------------------------------------------------------------
2267 
2268 static char *scan_expression(char *c, int *result);
2269 
2270 //---------------------------------------------------------------------
2271 
scan_format(char * c,TABLEROW ** result,int * maxcol)2272 static char *scan_format(char *c, TABLEROW **result, int *maxcol)
2273 {
2274     TABLEROW *layout, *currow;
2275     TABLEITEM *curfield;
2276     int i, j;
2277     if (*result)
2278     {
2279         clear_table(*result);
2280     }
2281     layout = currow = new TABLEROW();
2282     curfield = new TABLEITEM(currow);
2283     while (*c && *c != '.')
2284     {
2285         switch (*c)
2286         {
2287         case 'C':
2288         case 'c':
2289         case 'N':
2290         case 'n':
2291         case 'R':
2292         case 'r':
2293         case 'A':
2294         case 'a':
2295         case 'L':
2296         case 'l':
2297         case 'S':
2298         case 's':
2299         case '^':
2300         case '_':
2301             if (curfield->align)
2302                 curfield = new TABLEITEM(currow);
2303             curfield->align = toupper(*c);
2304             c++;
2305             break;
2306         case 'i':
2307         case 'I':
2308         case 'B':
2309         case 'b':
2310             curfield->font = toupper(*c);
2311             c++;
2312             break;
2313         case 'f':
2314         case 'F':
2315             c++;
2316             curfield->font = toupper(*c);
2317             c++;
2318             if (!isspace(*c) && *c != '.') c++;
2319             break;
2320         case 't':
2321         case 'T':
2322             curfield->valign = 't';
2323             c++;
2324             break;
2325         case 'p':
2326         case 'P':
2327             c++;
2328             i = j = 0;
2329             if (*c == '+')
2330             {
2331                 j = 1;
2332                 c++;
2333             }
2334             if (*c == '-')
2335             {
2336                 j = -1;
2337                 c++;
2338             }
2339             while (isdigit(*c)) i = i * 10 + (*c++) - '0';
2340             if (j) curfield->size = i * j;
2341             else curfield->size = j - 10;
2342             break;
2343         case 'v':
2344         case 'V':
2345         case 'w':
2346         case 'W':
2347             c = scan_expression(c + 2, &curfield->width);
2348             break;
2349         case '|':
2350             if (curfield->align) curfield->vleft++;
2351             else curfield->vright++;
2352             c++;
2353             break;
2354         case 'e':
2355         case 'E':
2356             c++;
2357             break;
2358         case '0':
2359         case '1':
2360         case '2':
2361         case '3':
2362         case '4':
2363         case '5':
2364         case '6':
2365         case '7':
2366         case '8':
2367         case '9':
2368             i = 0;
2369             while (isdigit(*c)) i = i * 10 + (*c++) - '0';
2370             curfield->space = i;
2371             break;
2372         case ',':
2373         case '\n':
2374             currow->next = new TABLEROW();
2375             currow->next->prev = currow;
2376             currow = currow->next;
2377             currow->next = nullptr;
2378             curfield = new TABLEITEM(currow);
2379             c++;
2380             break;
2381         default:
2382             c++;
2383             break;
2384         }
2385     }
2386     if (*c == '.') while (*c++ != '\n');
2387     *maxcol = 0;
2388     currow = layout;
2389     while (currow)
2390     {
2391         i = currow->length();
2392         if (i > *maxcol) *maxcol = i;
2393         currow = currow->next;
2394     }
2395     *result = layout;
2396     return c;
2397 }
2398 
2399 //---------------------------------------------------------------------
2400 
next_row(TABLEROW * tr)2401 static TABLEROW *next_row(TABLEROW *tr)
2402 {
2403     if (tr->next)
2404     {
2405         tr = tr->next;
2406         if (!tr->next)
2407             return next_row(tr);
2408         return tr;
2409     }
2410     else
2411     {
2412         tr->next = tr->copyLayout();
2413         tr->next->prev = tr;
2414         return tr->next;
2415     }
2416 }
2417 
2418 //---------------------------------------------------------------------
2419 
2420 static char itemreset[20] = "\\fR\\s0";
2421 
2422 #define FORWARDCUR  do { curfield++; } while (currow->has(curfield) &&  currow->at(curfield).align=='S');
2423 
scan_table(char * c)2424 static char *scan_table(char *c)
2425 {
2426     char *h;
2427     char *g;
2428     int center = 0, expand = 0, box = 0, border = 0, linesize = 1;
2429     int i, j, maxcol = 0, finished = 0;
2430     QByteArray oldfont;
2431     int oldsize, oldfillout;
2432     char itemsep = '\t';
2433     TABLEROW *layout = nullptr, *currow;
2434     int curfield = -1;
2435     while (*c++ != '\n');
2436     h = c;
2437     if (*h == '.') return c -1;
2438     oldfont = current_font;
2439     oldsize = current_size;
2440     oldfillout = fillout;
2441     out_html(set_font("R"));
2442     out_html(change_to_size(0));
2443     if (!fillout)
2444     {
2445         fillout = 1;
2446         out_html("</PRE>");
2447     }
2448     while (*h && *h != '\n') h++;
2449     if (h[-1] == ';')
2450     {
2451         /* scan table options */
2452         while (c < h)
2453         {
2454             while (isspace(*c)) c++;
2455             for (i = 0; tableopt[i] && qstrncmp(tableopt[i], c, tableoptl[i]); i++);
2456             c = c + tableoptl[i];
2457             switch (i)
2458             {
2459             case 0:
2460                 center = 1;
2461                 break;
2462             case 1:
2463                 expand = 1;
2464                 break;
2465             case 2:
2466                 box = 1;
2467                 break;
2468             case 3:
2469                 border = 1;
2470                 break;
2471             case 4:
2472                 box = 2;
2473                 break;
2474             case 5:
2475                 while (*c++ != '(');
2476                 itemsep = *c++;
2477                 break;
2478             case 6:
2479                 while (*c++ != '(');
2480                 linesize = 0;
2481                 while (isdigit(*c)) linesize = linesize * 10 + (*c++) - '0';
2482                 break;
2483             case 7:
2484                 while (*c != ')') c++;
2485             default:
2486                 break;
2487             }
2488             c++;
2489         }
2490         c = h + 1;
2491     }
2492     /* scan layout */
2493     c = scan_format(c, &layout, &maxcol);
2494 //    currow=layout;
2495     currow = next_row(layout);
2496     curfield = 0;
2497     i = 0;
2498     while (!finished && *c)
2499     {
2500         /* search item */
2501         h = c;
2502         if ((*c == '_' || *c == '=') && (c[1] == itemsep || c[1] == '\n'))
2503         {
2504             if (c[-1] == '\n' && c[1] == '\n')
2505             {
2506                 if (currow->prev)
2507                 {
2508                     currow->prev->next = new TABLEROW();
2509                     currow->prev->next->next = currow;
2510                     currow->prev->next->prev = currow->prev;
2511                     currow->prev = currow->prev->next;
2512                 }
2513                 else
2514                 {
2515                     currow->prev = layout = new TABLEROW();
2516                     currow->prev->prev = nullptr;
2517                     currow->prev->next = currow;
2518                 }
2519                 TABLEITEM *newitem = new TABLEITEM(currow->prev);
2520                 newitem->align = *c;
2521                 newitem->colspan = maxcol;
2522                 curfield = 0;
2523                 c = c + 2;
2524             }
2525             else
2526             {
2527                 if (currow->has(curfield))
2528                 {
2529                     currow->at(curfield).align = *c;
2530                     FORWARDCUR;
2531                 }
2532                 if (c[1] == '\n')
2533                 {
2534                     currow = next_row(currow);
2535                     curfield = 0;
2536                 }
2537                 c = c + 2;
2538             }
2539         }
2540         else if (*c == 'T' && c[1] == '{')
2541         {
2542             h = c + 2;
2543             c = strstr(h, "\nT}");
2544             c++;
2545             *c = '\0';
2546             g = nullptr;
2547             scan_troff(h, 0, &g);
2548             scan_troff(itemreset, 0, &g);
2549             *c = 'T';
2550             c += 3;
2551             if (currow->has(curfield))
2552             {
2553                 currow->at(curfield).setContents(g);
2554                 FORWARDCUR;
2555             }
2556             delete [] g;
2557 
2558             if (c[-1] == '\n')
2559             {
2560                 currow = next_row(currow);
2561                 curfield = 0;
2562             }
2563         }
2564         else if (*c == '.' && c[1] == 'T' && c[2] == '&' && c[-1] == '\n')
2565         {
2566             TABLEROW *hr;
2567             while (*c++ != '\n');
2568             hr = currow;
2569             currow = currow->prev;
2570             hr->prev = nullptr;
2571             c = scan_format(c, &hr, &i);
2572             hr->prev = currow;
2573             currow->next = hr;
2574             currow = hr;
2575             next_row(currow);
2576             curfield = 0;
2577         }
2578         else if (*c == '.' && c[1] == 'T' && c[2] == 'E' && c[-1] == '\n')
2579         {
2580             finished = 1;
2581             while (*c++ != '\n');
2582             if (currow->prev)
2583                 currow->prev->next = nullptr;
2584             currow->prev = nullptr;
2585             clear_table(currow);
2586             currow = nullptr;
2587         }
2588         else if (*c == '.' && c[-1] == '\n' && !isdigit(c[1]))
2589         {
2590             /* skip troff request inside table (usually only .sp ) */
2591             while (*c++ != '\n');
2592         }
2593         else
2594         {
2595             h = c;
2596             while (*c && (*c != itemsep || c[-1] == '\\') &&
2597                     (*c != '\n' || c[-1] == '\\')) c++;
2598             i = 0;
2599             if (*c == itemsep)
2600             {
2601                 i = 1;
2602                 *c = '\n';
2603             }
2604             if (h[0] == '\\' && h[2] == '\n' &&
2605                     (h[1] == '_' || h[1] == '^'))
2606             {
2607                 if (currow->has(curfield))
2608                 {
2609                     currow->at(curfield).align = h[1];
2610                     FORWARDCUR;
2611                 }
2612                 h = h + 3;
2613             }
2614             else
2615             {
2616                 g = nullptr;
2617                 h = scan_troff(h, 1, &g);
2618                 scan_troff(itemreset, 0, &g);
2619                 if (currow->has(curfield))
2620                 {
2621                     currow->at(curfield).setContents(g);
2622                     FORWARDCUR;
2623                 }
2624                 delete [] g;
2625             }
2626             if (i) *c = itemsep;
2627             c = h;
2628             if (c[-1] == '\n')
2629             {
2630                 currow = next_row(currow);
2631                 curfield = 0;
2632             }
2633         }
2634     }
2635     /* calculate colspan and rowspan */
2636     currow = layout;
2637     while (currow->next) currow = currow->next;
2638     while (currow)
2639     {
2640         int ti = 0, ti1 = 0, ti2 = -1;
2641         TABLEROW *prev = currow->prev;
2642         if (!prev)
2643             break;
2644 
2645         while (prev->has(ti1))
2646         {
2647             if (currow->has(ti))
2648                 switch (currow->at(ti).align)
2649                 {
2650                 case 'S':
2651                     if (currow->has(ti2))
2652                     {
2653                         currow->at(ti2).colspan++;
2654                         if (currow->at(ti2).rowspan < prev->at(ti1).rowspan)
2655                             currow->at(ti2).rowspan = prev->at(ti1).rowspan;
2656                     }
2657                     break;
2658                 case '^':
2659                     if (prev->has(ti1)) prev->at(ti1).rowspan++;
2660                 default:
2661                     if (ti2 < 0) ti2 = ti;
2662                     else
2663                     {
2664                         do
2665                         {
2666                             ti2++;
2667                         }
2668                         while (currow->has(ti2) && currow->at(ti2).align == 'S');
2669                     }
2670                     break;
2671                 }
2672             ti++;
2673             if (ti1 >= 0) ti1++;
2674         }
2675         currow = currow->prev;
2676     }
2677     /* produce html output */
2678     if (center) out_html("<CENTER>");
2679     if (box == 2) out_html("<TABLE BORDER><TR><TD>");
2680     out_html("<TABLE");
2681     if (box || border)
2682     {
2683         out_html(" BORDER");
2684         if (!border) out_html("><TR><TD><TABLE");
2685         if (expand) out_html(" WIDTH=\"100%\"");
2686     }
2687     out_html(">\n");
2688     currow = layout;
2689     while (currow)
2690     {
2691         j = 0;
2692         out_html("<TR VALIGN=top>");
2693         curfield = 0;
2694         while (currow->has(curfield))
2695         {
2696             if (currow->at(curfield).align != 'S' && currow->at(curfield).align != '^')
2697             {
2698                 out_html("<TD style='padding-right:10px; padding-left:10px;'");
2699                 switch (currow->at(curfield).align)
2700                 {
2701                 case 'N':
2702                     currow->at(curfield).space += 4;
2703                 case 'R':
2704                     out_html(" ALIGN=right");
2705                     break;
2706                 case 'C':
2707                     out_html(" ALIGN=center");
2708                 default:
2709                     break;
2710                 }
2711                 if (!currow->at(curfield).valign && currow->at(curfield).rowspan > 1)
2712                     out_html(" VALIGN=center");
2713                 if (currow->at(curfield).colspan > 1)
2714                 {
2715                     out_html(" COLSPAN=");
2716                     out_html(QByteArray::number(currow->at(curfield).colspan));
2717                 }
2718                 if (currow->at(curfield).rowspan > 1)
2719                 {
2720                     out_html(" ROWSPAN=");
2721                     out_html(QByteArray::number(currow->at(curfield).rowspan));
2722                 }
2723                 j = j + currow->at(curfield).colspan;
2724                 out_html(">");
2725                 if (currow->at(curfield).size) out_html(change_to_size(currow->at(curfield).size));
2726                 if (currow->at(curfield).font)
2727                     out_html(set_font(QByteArray::number(currow->at(curfield).font)));
2728                 switch (currow->at(curfield).align)
2729                 {
2730                 case '=':
2731                     out_html("<HR><HR>");
2732                     break;
2733                 case '_':
2734                     out_html("<HR>");
2735                     break;
2736                 default:
2737                     out_html(currow->at(curfield).getContents());
2738                     break;
2739                 }
2740                 if (currow->at(curfield).space)
2741                     for (i = 0; i < currow->at(curfield).space; i++) out_html("&nbsp;");
2742                 if (currow->at(curfield).font) out_html(set_font("R"));
2743                 if (currow->at(curfield).size) out_html(change_to_size(0));
2744                 if (j >= maxcol && currow->at(curfield).align > '@' && currow->at(curfield).align != '_')
2745                     out_html("<BR>");
2746                 out_html("</TD>");
2747             }
2748             curfield++;
2749         }
2750         out_html("</TR>\n");
2751         currow = currow->next;
2752     }
2753 
2754     clear_table(layout);
2755 
2756     if (box && !border) out_html("</TABLE>");
2757     out_html("</TABLE>");
2758     if (box == 2) out_html("</TABLE>");
2759     if (center)
2760         out_html("</CENTER>\n");
2761     else
2762         out_html("\n");
2763     if (!oldfillout) out_html("<PRE>");
2764     fillout = oldfillout;
2765     out_html(change_to_size(oldsize));
2766     out_html(set_font(oldfont));
2767     return c;
2768 }
2769 
2770 //---------------------------------------------------------------------
2771 
scan_expression(char * c,int * result,const unsigned int numLoop)2772 static char *scan_expression(char *c, int *result, const unsigned int numLoop)
2773 {
2774     int value = 0, value2, sign = 1, opex = 0;
2775     char oper = 'c';
2776     bool oldSkipEscape = skip_escape;
2777     skip_escape = true;  // evaluating an expression shall not print it
2778 
2779     if (*c == '!')
2780     {
2781         c = scan_expression(c + 1, &value);
2782         value = (!value);
2783     }
2784     else if (*c == 'n')
2785     {
2786         c++;
2787         value = s_nroff;
2788     }
2789     else if (*c == 't')
2790     {
2791         c++;
2792         value = 1 - s_nroff;
2793     }
2794     else if (*c == '\'' || *c == '"' || *c < ' ' || (*c == '\\' && c[1] == '('))
2795     {
2796         /* ?string1?string2?
2797         ** test if string1 equals string2.
2798         */
2799         char *st1 = nullptr, *st2 = nullptr, *h;
2800         char *tcmp = nullptr;
2801         char sep;
2802         sep = *c;
2803         if (sep == '\\')
2804         {
2805             tcmp = c;
2806             c = c + 3;
2807         }
2808         c++;
2809         h = c;
2810         while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4))) c++;
2811         *c = '\n';
2812         scan_troff(h, 1, &st1);
2813         *c = sep;
2814         if (tcmp) c = c + 3;
2815         c++;
2816         h = c;
2817         while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4))) c++;
2818         *c = '\n';
2819         scan_troff(h, 1, &st2);
2820         *c = sep;
2821         if (!st1 && !st2) value = 1;
2822         else if (!st1 || !st2) value = 0;
2823         else value = (!qstrcmp(st1, st2));
2824         delete [] st1;
2825         delete [] st2;
2826         if (tcmp) c = c + 3;
2827         c++;
2828     }
2829     else
2830     {
2831         while (*c && (!isspace(*c) || (numLoop > 0)) && *c != ')' && opex >= 0)
2832         {
2833             opex = 0;
2834             switch (*c)
2835             {
2836             case '(':
2837                 c = scan_expression(c + 1, &value2, numLoop + 1);
2838                 value2 = sign * value2;
2839                 opex = 1;
2840                 break;
2841             case '.':
2842             case '0':
2843             case '1':
2844             case '2':
2845             case '3':
2846             case '4':
2847             case '5':
2848             case '6':
2849             case '7':
2850             case '8':
2851             case '9':
2852             {
2853                 int num = 0, denum = 1;
2854                 value2 = 0;
2855                 while (isdigit(*c)) value2 = value2 * 10 + ((*c++) - '0');
2856                 if (*c == '.' && isdigit(c[1]))
2857                 {
2858                     c++;
2859                     while (isdigit(*c))
2860                     {
2861                         num = num * 10 + ((*c++) - '0');
2862                         denum = denum * 10;
2863                     }
2864                 }
2865                 if (isalpha(*c))
2866                 {
2867                     /* scale indicator */
2868                     switch (*c)
2869                     {
2870                     case 'i': /* inch -> 10pt */
2871                         value2 = value2 * 10 + (num * 10 + denum / 2) / denum;
2872                         num = 0;
2873                         break;
2874                     default:
2875                         break;
2876                     }
2877                     c++;
2878                 }
2879                 value2 = value2 + (num + denum / 2) / denum;
2880                 value2 = sign * value2;
2881                 opex = 1;
2882                 if (*c == '.')
2883                     opex = -1;
2884 
2885             }
2886             break;
2887             case '\\':
2888                 c = scan_escape(c + 1);
2889                 value2 = intresult * sign;
2890                 if (isalpha(*c)) c++; /* scale indicator */
2891                 opex = 1;
2892                 break;
2893             case '-':
2894                 if (oper)
2895                 {
2896                     sign = -1;
2897                     c++;
2898                     break;
2899                 }
2900                 Q_FALLTHROUGH();
2901             case '>':
2902             case '<':
2903             case '+':
2904             case '/':
2905             case '*':
2906             case '%':
2907             case '&':
2908             case '=':
2909             case ':':
2910                 if (c[1] == '=') oper = (*c++) + 16;
2911                 else oper = *c;
2912                 c++;
2913                 break;
2914             default:
2915                 c++;
2916                 break;
2917             }
2918             if (opex > 0)
2919             {
2920                 sign = 1;
2921                 switch (oper)
2922                 {
2923                 case 'c':
2924                     value = value2;
2925                     break;
2926                 case '-':
2927                     value = value - value2;
2928                     break;
2929                 case '+':
2930                     value = value + value2;
2931                     break;
2932                 case '*':
2933                     value = value * value2;
2934                     break;
2935                 case '/':
2936                     if (value2) value = value / value2;
2937                     break;
2938                 case '%':
2939                     if (value2) value = value % value2;
2940                     break;
2941                 case '<':
2942                     value = (value < value2);
2943                     break;
2944                 case '>':
2945                     value = (value > value2);
2946                     break;
2947                 case '>'+16:
2948                     value = (value >= value2);
2949                     break;
2950                 case '<'+16:
2951                     value = (value <= value2);
2952                     break;
2953                 case '=':
2954                 case '='+16:
2955                     value = (value == value2);
2956                     break;
2957                 case '&':
2958                     value = (value && value2);
2959                     break;
2960                 case ':':
2961                     value = (value || value2);
2962                     break;
2963                 default:
2964                 {
2965                     qCDebug(KIO_MAN_LOG) << "Unknown operator " << char(oper);
2966                 }
2967                 }
2968                 oper = 0;
2969             }
2970         }
2971         if (*c == ')') c++;
2972     }
2973     *result = value;
2974 
2975     skip_escape = oldSkipEscape;
2976 
2977     return c;
2978 }
2979 
2980 //---------------------------------------------------------------------
2981 
scan_expression(char * c,int * result)2982 static char *scan_expression(char *c, int *result)
2983 {
2984     return scan_expression(c, result, 0);
2985 }
2986 
2987 //---------------------------------------------------------------------
2988 
trans_char(char * c,char s,char t)2989 static void trans_char(char *c, char s, char t)
2990 {
2991     char *sl = c;
2992     int slash = 0;
2993     while (*sl != '\n' || slash)
2994     {
2995         if (!slash)
2996         {
2997             if (*sl == escapesym)
2998                 slash = 1;
2999             else if (*sl == s)
3000                 *sl = t;
3001         }
3002         else slash = 0;
3003         sl++;
3004     }
3005 }
3006 
3007 //---------------------------------------------------------------------
3008 // parse 1 line (or a line which stretches multiple lines by \(enter) )
3009 // return all arguments starting at \p c in \p args
3010 // returns the pointer to the next char where scanning should continue
3011 // (which is the char after the ending \n)
3012 // argPointers .. a list of pointers to the startchars of each arg pointing into the string given with c
3013 
getArguments(char * & c,QList<QByteArray> & args,QList<char * > * argPointers=nullptr)3014 void getArguments(/* const */ char *&c, QList<QByteArray> &args, QList<char*> *argPointers = nullptr)
3015 {
3016     args.clear();
3017     if ( argPointers )
3018         argPointers->clear();
3019 
3020     QByteArray arg;
3021     arg.reserve(30);  // reduce num of reallocs
3022     bool inString = false;
3023     bool inArgument = false;
3024 
3025     for (; *c && (*c != '\n'); c++)
3026     {
3027         if ( *c == '"' )
3028         {
3029             if ( !inString )
3030             {
3031                 inString = true;  // start of quoted argument
3032             }
3033             else
3034             {
3035                 // according to http://heirloom.sourceforge.net/doctools/troff.pdf chapter 7.3
3036                 // two consecutive quotes inside a string is one quote char
3037                 if ( *(c+1) == '"' )
3038                 {
3039                     arg += '"';
3040                     c++;
3041                 }
3042                 else  // end of quoted argument
3043                 {
3044                     args.append(arg);
3045                     arg.clear();
3046                     inString = false;
3047                     inArgument = false;
3048                 }
3049             }
3050         }
3051         else if ( *c == ' ' )
3052         {
3053             if ( inString )
3054             {
3055                 arg += *c;
3056                 if ( !inArgument )  // argument not yet found (leading spaces)
3057                 {
3058                     inArgument = true;
3059 
3060                     if ( argPointers )
3061                         argPointers->append(c);
3062                 }
3063             }
3064             else if ( inArgument )
3065             {
3066                 // end of previous argument
3067                 args.append(arg);
3068                 arg.clear();
3069                 inArgument = false;
3070             }
3071         }
3072         else if ( (*c == escapesym) && (*(c+1) == ' ') )
3073         {
3074             // special handling \<SP> shall be kept as is
3075             arg += *c++;
3076             arg += *c;
3077 
3078             if ( !inArgument )  // argument not yet found (leading spaces)
3079             {
3080                 inArgument = true;
3081 
3082                 if ( argPointers )
3083                     argPointers->append(c);
3084             }
3085         }
3086         else if ( (*c == escapesym) && (*(c+1) == '\n') )
3087         {
3088             c++;
3089         }
3090         else if ( (*c == escapesym) && (*(c+1) == '"') )  // start of comment; skip rest of line
3091         {
3092             if ( inArgument )
3093             {
3094                 // end of previous argument
3095                 args.append(arg);
3096                 arg.clear();
3097                 inArgument = false;
3098             }
3099 
3100             // skip rest of line
3101             while ( *c && (*c != '\n') ) c++;
3102             break;
3103         }
3104         else if ( *c != ' ' )
3105         {
3106             arg += *c;
3107             if ( !inArgument )  // argument not yet found (leading spaces)
3108             {
3109                 inArgument = true;
3110 
3111                 if ( argPointers )
3112                     argPointers->append(c);
3113             }
3114         }
3115     }
3116 
3117     if ( inArgument )
3118     {
3119         // end of previous argument
3120         args.append(arg);
3121     }
3122 
3123     if ( *c ) c++;
3124 }
3125 
3126 //---------------------------------------------------------------------
3127 
3128 static const char * const abbrev_list[] =
3129 {
3130     "GSBG", "Getting Started ",
3131     "SUBG", "Customizing SunOS",
3132     "SHBG", "Basic Troubleshooting",
3133     "SVBG", "SunView User's Guide",
3134     "MMBG", "Mail and Messages",
3135     "DMBG", "Doing More with SunOS",
3136     "UNBG", "Using the Network",
3137     "GDBG", "Games, Demos &amp; Other Pursuits",
3138     "CHANGE", "SunOS 4.1 Release Manual",
3139     "INSTALL", "Installing SunOS 4.1",
3140     "ADMIN", "System and Network Administration",
3141     "SECUR", "Security Features Guide",
3142     "PROM", "PROM User's Manual",
3143     "DIAG", "Sun System Diagnostics",
3144     "SUNDIAG", "Sundiag User's Guide",
3145     "MANPAGES", "SunOS Reference Manual",
3146     "REFMAN", "SunOS Reference Manual",
3147     "SSI", "Sun System Introduction",
3148     "SSO", "System Services Overview",
3149     "TEXT", "Editing Text Files",
3150     "DOCS", "Formatting Documents",
3151     "TROFF", "Using <B>nroff</B> and <B>troff</B>",
3152     "INDEX", "Global Index",
3153     "CPG", "C Programmer's Guide",
3154     "CREF", "C Reference Manual",
3155     "ASSY", "Assembly Language Reference",
3156     "PUL", "Programming Utilities and Libraries",
3157     "DEBUG", "Debugging Tools",
3158     "NETP", "Network Programming",
3159     "DRIVER", "Writing Device Drivers",
3160     "STREAMS", "STREAMS Programming",
3161     "SBDK", "SBus Developer's Kit",
3162     "WDDS", "Writing Device Drivers for the SBus",
3163     "FPOINT", "Floating-Point Programmer's Guide",
3164     "SVPG", "SunView 1 Programmer's Guide",
3165     "SVSPG", "SunView 1 System Programmer's Guide",
3166     "PIXRCT", "Pixrect Reference Manual",
3167     "CGI", "SunCGI Reference Manual",
3168     "CORE", "SunCore Reference Manual",
3169     "4ASSY", "Sun-4 Assembly Language Reference",
3170     "SARCH", "<FONT SIZE=\"-1\">SPARC</FONT> Architecture Manual",
3171     "KR", "The C Programming Language",
3172     nullptr, nullptr
3173 };
3174 
lookup_abbrev(const char * c)3175 static const char *lookup_abbrev(const char *c)
3176 {
3177     int i = 0;
3178 
3179     if (!c) return "";
3180     while (abbrev_list[i] && qstrcmp(c, abbrev_list[i])) i = i + 2;
3181     if (abbrev_list[i])
3182         return abbrev_list[i+1];
3183     else
3184         return c;
3185 }
3186 
3187 //---------------------------------------------------------------------
3188 
3189 static const char * const section_list[] =
3190 {
3191 #ifdef Q_OS_SOLARIS
3192     // for Solaris
3193     "1", "User Commands",
3194     "1B", "SunOS/BSD Compatibility Package Commands",
3195     "1b", "SunOS/BSD Compatibility Package Commands",
3196     "1C", "Communication Commands ",
3197     "1c", "Communication Commands",
3198     "1F", "FMLI Commands ",
3199     "1f", "FMLI Commands",
3200     "1G", "Graphics and CAD Commands ",
3201     "1g", "Graphics and CAD Commands ",
3202     "1M", "Maintenance Commands",
3203     "1m", "Maintenance Commands",
3204     "1S", "SunOS Specific Commands",
3205     "1s", "SunOS Specific Commands",
3206     "2", "System Calls",
3207     "3", "C Library Functions",
3208     "3B", "SunOS/BSD Compatibility Library Functions",
3209     "3b", "SunOS/BSD Compatibility Library Functions",
3210     "3C", "C Library Functions",
3211     "3c", "C Library Functions",
3212     "3E", "C Library Functions",
3213     "3e", "C Library Functions",
3214     "3F", "Fortran Library Routines",
3215     "3f", "Fortran Library Routines",
3216     "3G", "C Library Functions",
3217     "3g", "C Library Functions",
3218     "3I", "Wide Character Functions",
3219     "3i", "Wide Character Functions",
3220     "3K", "Kernel VM Library Functions",
3221     "3k", "Kernel VM Library Functions",
3222     "3L", "Lightweight Processes Library",
3223     "3l", "Lightweight Processes Library",
3224     "3M", "Mathematical Library",
3225     "3m", "Mathematical Library",
3226     "3N", "Network Functions",
3227     "3n", "Network Functions",
3228     "3R", "Realtime Library",
3229     "3r", "Realtime Library",
3230     "3S", "Standard I/O Functions",
3231     "3s", "Standard I/O Functions",
3232     "3T", "Threads Library",
3233     "3t", "Threads Library",
3234     "3W", "C Library Functions",
3235     "3w", "C Library Functions",
3236     "3X", "Miscellaneous Library Functions",
3237     "3x", "Miscellaneous Library Functions",
3238     "4", "File Formats",
3239     "4B", "SunOS/BSD Compatibility Package File Formats",
3240     "4b", "SunOS/BSD Compatibility Package File Formats",
3241     "5", "Headers, Tables, and Macros",
3242     "6", "Games and Demos",
3243     "7", "Special Files",
3244     "7B", "SunOS/BSD Compatibility Special Files",
3245     "7b", "SunOS/BSD Compatibility Special Files",
3246     "8", "Maintenance Procedures",
3247     "8C", "Maintenance Procedures",
3248     "8c", "Maintenance Procedures",
3249     "8S", "Maintenance Procedures",
3250     "8s", "Maintenance Procedures",
3251     "9", "DDI and DKI",
3252     "9E", "DDI and DKI Driver Entry Points",
3253     "9e", "DDI and DKI Driver Entry Points",
3254     "9F", "DDI and DKI Kernel Functions",
3255     "9f", "DDI and DKI Kernel Functions",
3256     "9S", "DDI and DKI Data Structures",
3257     "9s", "DDI and DKI Data Structures",
3258     "L", "Local Commands",
3259 #elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
3260     "1", "General Commands",
3261     "2", "System Calls",
3262     "3", "Library Functions",
3263     "4", "Kernel Interfaces",
3264     "5", "File Formats",
3265     "6", "Games",
3266     "7", "Miscellaneous Information",
3267     "8", "System Manager's Manuals",
3268     "9", "Kernel Developer's Manuals",
3269 #else
3270     // Other OS
3271     "1", "User Commands ",
3272     "1C", "User Commands",
3273     "1G", "User Commands",
3274     "1S", "User Commands",
3275     "1V", "User Commands ",
3276     "2", "System Calls",
3277     "2V", "System Calls",
3278     "3", "C Library Functions",
3279     "3C", "Compatibility Functions",
3280     "3F", "Fortran Library Routines",
3281     "3K", "Kernel VM Library Functions",
3282     "3L", "Lightweight Processes Library",
3283     "3M", "Mathematical Library",
3284     "3N", "Network Functions",
3285     "3R", "RPC Services Library",
3286     "3S", "Standard I/O Functions",
3287     "3V", "C Library Functions",
3288     "3X", "Miscellaneous Library Functions",
3289     "4", "Devices and Network Interfaces",
3290     "4F", "Protocol Families",
3291     "4I", "Devices and Network Interfaces",
3292     "4M", "Devices and Network Interfaces",
3293     "4N", "Devices and Network Interfaces",
3294     "4P", "Protocols",
3295     "4S", "Devices and Network Interfaces",
3296     "4V", "Devices and Network Interfaces",
3297     "5", "File Formats",
3298     "5V", "File Formats",
3299     "6", "Games and Demos",
3300     "7", "Environments, Tables, and Troff Macros",
3301     "7V", "Environments, Tables, and Troff Macros",
3302     "8", "Maintenance Commands",
3303     "8C", "Maintenance Commands",
3304     "8S", "Maintenance Commands",
3305     "8V", "Maintenance Commands",
3306     "L", "Local Commands",
3307 #endif
3308     // The defaults
3309     nullptr, "Misc. Reference Manual Pages",
3310     nullptr, nullptr
3311 };
3312 
section_name(char * c)3313 static const char *section_name(char *c)
3314 {
3315     int i = 0;
3316 
3317     if (!c) return "";
3318     while (section_list[i] && qstrcmp(c, section_list[i])) i = i + 2;
3319     if (section_list[i+1]) return section_list[i+1];
3320     else return c;
3321 }
3322 
skip_till_newline(char * c)3323 static char *skip_till_newline(char *c)
3324 {
3325     int lvl = 0;
3326 
3327     while (*c && (*c != '\n' || lvl > 0))
3328     {
3329         if (*c == '\\')
3330         {
3331             c++;
3332             if (*c == '}')
3333                 lvl--;
3334             else if (*c == '{')
3335                 lvl++;
3336             else if (*c == '\0')
3337                 break;
3338         }
3339         c++;
3340     }
3341     if (*c) c++;
3342     if (lvl < 0 && newline_for_fun)
3343     {
3344         newline_for_fun = newline_for_fun + lvl;
3345         if (newline_for_fun < 0) newline_for_fun = 0;
3346     }
3347     return c;
3348 }
3349 
3350 //---------------------------------------------------------------------
3351 
3352 static bool s_whileloop = false;
3353 
3354 /// Processing the .while request
request_while(char * & c,int j,bool mdoc)3355 static void request_while(char*& c, int j, bool mdoc)
3356 {
3357     // ### TODO: .continue
3358     qCDebug(KIO_MAN_LOG) << "Entering .while";
3359     c += j;
3360     char* newline = skip_till_newline(c);
3361     const char oldchar = *newline;
3362     *newline = 0;
3363     // We store the full .while stuff into a QByteArray as if it would be a macro
3364     const QByteArray macro = c ;
3365     qCDebug(KIO_MAN_LOG) << "'Macro' of .while" << BYTEARRAY(macro);
3366     // Prepare for continuing after .while loop end
3367     *newline = oldchar;
3368     c = newline;
3369     // Process -while loop
3370     const bool oldwhileloop = s_whileloop;
3371     s_whileloop = true;
3372     int result = true; // It must be an int due to the call to scan_expression
3373     break_the_while_loop = false;
3374     while (result && !break_the_while_loop)
3375     {
3376         // Unlike for a normal macro, we have the condition at start, so we do not need to prepend extra bytes
3377         char* liveloop = qstrdup(macro.data());
3378         qCDebug(KIO_MAN_LOG) << "Scanning .while condition";
3379         qCDebug(KIO_MAN_LOG) << "Loop macro " << liveloop;
3380         char* end_expression = scan_expression(liveloop, &result);
3381         qCDebug(KIO_MAN_LOG) << "After " << end_expression;
3382         if (result)
3383         {
3384             qCDebug(KIO_MAN_LOG) << "New .while iteration";
3385             // The condition is true, so call the .while's content
3386             char* help = end_expression + 1;
3387             while (*help && (*help == ' '  || *help == '\t'))
3388                 ++help;
3389             if (! *help)
3390             {
3391                 // We have a problem, so stop .while
3392                 result = false;
3393                 break;
3394             }
3395             if (mdoc)
3396                 scan_troff_mandoc(help, false, nullptr);
3397             else
3398                 scan_troff(help, false, nullptr);
3399         }
3400         delete[] liveloop;
3401     }
3402     break_the_while_loop = false;
3403 
3404     //
3405     s_whileloop = oldwhileloop;
3406     qCDebug(KIO_MAN_LOG) << "Ending .while";
3407 }
3408 
3409 //---------------------------------------------------------------------
3410 // Processing mixed fonts requests like .BI
3411 
request_mixed_fonts(char * & c,int j,const char * font1,const char * font2,const bool mode,const bool inFMode)3412 static void request_mixed_fonts(char*& c, int j, const char* font1, const char* font2, const bool mode, const bool inFMode)
3413 {
3414     c += j;
3415     if (*c == '\n') c++;
3416 
3417     QList<QByteArray> args;
3418     getArguments(c, args);
3419 
3420     for (int i = 0; i < args.count(); i++)
3421     {
3422         if (mode || inFMode)
3423         {
3424             out_html(" ");
3425             curpos++;
3426         }
3427         out_html(set_font((i&1) ? font2 : font1));
3428         scan_troff(args[i].data(), 1, nullptr);
3429     }
3430     out_html(set_font("R"));
3431     if (mode)
3432     {
3433         out_html(" ]");
3434         curpos++;
3435     }
3436     out_html(NEWLINE);
3437     if (!fillout)
3438         curpos = 0;
3439     else
3440         curpos++;
3441 }
3442 
3443 //---------------------------------------------------------------------
3444 
3445 // &%(#@ c programs !!!
3446 //static int ifelseval=0;
3447 // If/else can be nested!
3448 static QStack<int> s_ifelseval;
3449 
3450 //---------------------------------------------------------------------
3451 
3452 // Process a (mdoc) request involving quotes
process_quote(char * c,int j,const char * open,const char * close)3453 static char* process_quote(char* c, int j, const char* open, const char* close)
3454 {
3455     trans_char(c, '"', '\a');
3456     c += j;
3457     if (*c == '\n') c++; // ### TODO: why? Quote requests cannot be empty!
3458     out_html(open);
3459     c = scan_troff_mandoc(c, 1, nullptr);
3460     out_html(close);
3461     out_html(NEWLINE);
3462     if (fillout)
3463         curpos++;
3464     else
3465         curpos = 0;
3466     return c;
3467 }
3468 
3469 //---------------------------------------------------------------------
3470 /**
3471  * Is the char \p ch a punctuation in sense of mdoc(7)
3472  */
3473 
is_mdoc_punctuation(const char ch)3474 static bool is_mdoc_punctuation(const char ch)
3475 {
3476     if ((ch >= '0' &&  ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
3477         return false;
3478     else if (ch == '.' || ch == ',' || ch == ';' || ch == ':' || ch == '(' || ch == ')'
3479              || ch == '[' || ch == ']')
3480         return true;
3481     else
3482         return false;
3483 }
3484 
3485 //---------------------------------------------------------------------
3486 /**
3487  * Can the char \p c be part of an identifier
3488  * \note For groff, an identifier can consist of nearly all ASCII printable non-white-space characters
3489  * See info:/groff/Identifiers
3490  */
3491 
is_identifier_char(const char c)3492 static bool is_identifier_char(const char c)
3493 {
3494     if (c >= '!' && c <= '[')   // Include digits and upper case
3495         return true;
3496     else if (c >= ']' && c <= '~')   // Include lower case
3497         return true;
3498     else if (c == '\\')
3499         return false; // ### TODO: it should be treated as escape instead!
3500     return false;
3501 }
3502 
3503 //---------------------------------------------------------------------
3504 
scan_identifier(char * & c)3505 static QByteArray scan_identifier(char*& c)
3506 {
3507     char* h = c; // help pointer
3508     // ### TODO Groff seems to eat nearly everything as identifier name (info:/groff/Identifiers)
3509     while (*h && *h != '\a' && *h != '\n' && is_identifier_char(*h))
3510         ++h;
3511     const char tempchar = *h;
3512     *h = 0;
3513     const QByteArray name = c;
3514     *h = tempchar;
3515     if (name.isEmpty())
3516     {
3517         qCDebug(KIO_MAN_LOG) << "EXCEPTION: identifier empty!";
3518     }
3519     c = h;
3520     return name;
3521 }
3522 
3523 //---------------------------------------------------------------------
3524 
scan_request(char * c)3525 static char *scan_request(char *c)
3526 {
3527     // mdoc(7) stuff
3528     static bool mandoc_synopsis = false; /* True if we are in the synopsis section */
3529     static bool mandoc_command = false;  /* True if this is mdoc(7) page */
3530     static int mandoc_bd_options; /* Only copes with non-nested Bd's */
3531     static int function_argument = 0; // Number of function argument (.Fo, .Fa, .Fc)
3532 
3533     int i = 0;
3534     bool mode = false;
3535     char *h = nullptr;
3536     char *sl;
3537     QList<QByteArray> args;
3538 
3539     while (*c == ' ' || *c == '\t') c++; // Spaces or tabs allowed between control character and request
3540     if (c[0] == '\n') return c + 1;
3541     if (c[0] == escapesym)
3542     {
3543         /* some pages use .\" .\$1 .\} */
3544         /* .\$1 is too difficult/stuppid */
3545         if (c[1] == '$')
3546         {
3547             qCDebug(KIO_MAN_LOG) << "Found .\\$";
3548             c = skip_till_newline(c); // ### TODO
3549         }
3550         else
3551         {
3552             // the result of the escape expansion must be parsed again
3553             c++;
3554             QByteArray cstr;
3555             c = scan_escape_direct(c, cstr);
3556             for (; *c && (*c != '\n'); c++) cstr += *c;
3557             if ( cstr.length() )
3558                 scan_request(cstr.data());
3559         }
3560     }
3561     else
3562     {
3563         int nlen = 0;
3564         QByteArray macroName;
3565         while (c[nlen] && (c[nlen] != ' ') && (c[nlen] != '\t') && (c[nlen] != '\n') && (c[nlen] != escapesym))
3566         {
3567             macroName += c[nlen];
3568             nlen++;
3569         }
3570         int j = nlen;
3571         while (c[j] == ' ' || c[j] == '\t') j++;
3572         /* search macro database of self-defined macros */
3573         QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(macroName);
3574 
3575         // ### HACK: e.g. nmap, smb.conf redefine SH, SS to increase the font, etc. for non-TTY output
3576         // Ignore those to make the HTML result look better
3577         if ( (macroName != "SH") && (macroName != "SS") &&
3578                 it != s_stringDefinitionMap.constEnd() )
3579         {
3580             qCDebug(KIO_MAN_LOG) << "CALLING MACRO: " << BYTEARRAY(macroName);
3581             const QByteArray oldDollarZero = s_dollarZero; // Previous value of $0
3582             s_dollarZero = macroName;
3583 
3584             c += j;
3585             getArguments(c, args);
3586             for (i = 0; i < args.count(); i++)
3587             {
3588                 char *h = nullptr;
3589 
3590                 if (mandoc_command)
3591                     scan_troff_mandoc(args[i].data(), 1, &h);
3592                 else
3593                     scan_troff(args[i].data(), 1, &h);
3594 
3595                 args[i] = h;
3596                 delete [] h;
3597             }
3598 
3599             if (!(*it).m_output.isEmpty())
3600             {
3601                 //qCDebug(KIO_MAN_LOG) << "Macro content is: "<< BYTEARRAY( (*it).m_output );
3602                 const unsigned int length = (*it).m_output.length();
3603                 char* work = new char [length+2];
3604                 work[0] = '\n'; // The macro must start after an end of line to allow a request on first line
3605                 qstrncpy(work + 1, (*it).m_output.data(), length + 1);
3606                 const QList<QByteArray> oldArgumentList(s_argumentList);
3607                 s_argumentList.clear();
3608                 for (i = 0; i < args.count(); i++)
3609                     s_argumentList.push_back(args[i]);
3610 
3611                 const int onff = newline_for_fun;
3612                 if (mandoc_command)
3613                     scan_troff_mandoc(work + 1, 0, nullptr);
3614                 else
3615                     scan_troff(work + 1, 0, nullptr);
3616                 delete[] work;
3617                 newline_for_fun = onff;
3618                 s_argumentList = oldArgumentList;
3619             }
3620             s_dollarZero = oldDollarZero;
3621             qCDebug(KIO_MAN_LOG) << "ENDING MACRO: " << BYTEARRAY(macroName);
3622         }
3623         else
3624         {
3625             qCDebug(KIO_MAN_LOG) << "REQUEST: " << BYTEARRAY(macroName);
3626             switch (RequestNum request = RequestHash::getRequest(macroName, macroName.length()))
3627             {
3628             case REQ_ab: // groff(7) "ABort"
3629             {
3630                 h = c + j;
3631                 while (*h && *h != '\n') h++;
3632                 *h = '\0';
3633                 if (scaninbuff && buffpos)
3634                 {
3635                     buffer[buffpos] = '\0';
3636                     qCDebug(KIO_MAN_LOG) << "ABORT: " << buffer;
3637                 }
3638                 // ### TODO find a way to display it to the user
3639                 qCDebug(KIO_MAN_LOG) << "Aborting: .ab " << (c + j);
3640                 return nullptr;
3641                 break;
3642             }
3643             case REQ_An: // mdoc(7) "Author Name"
3644             {
3645                 c += j;
3646                 c = scan_troff_mandoc(c, 1, nullptr);
3647                 break;
3648             }
3649             case REQ_di: // groff(7) "end current DIversion"
3650             {
3651                 qCDebug(KIO_MAN_LOG) << "Start .di";
3652                 c += j;
3653                 if (*c == '\n')
3654                 {
3655                     ++c;
3656                     break;
3657                 }
3658                 const QByteArray name(scan_identifier(c));
3659                 while (*c && *c != '\n') c++;
3660                 c++;
3661                 h = c;
3662                 while (*c && qstrncmp(c, ".di", 3)) while (*c && *c++ != '\n');
3663                 *c = '\0';
3664                 char* result = nullptr;
3665                 scan_troff(h, 0, &result);
3666                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
3667                 if (it == s_stringDefinitionMap.end())
3668                 {
3669                     StringDefinition def;
3670                     def.m_length = 0;
3671                     def.m_output = result;
3672                     s_stringDefinitionMap.insert(name, def);
3673                 }
3674                 else
3675                 {
3676                     (*it).m_length = 0;
3677                     (*it).m_output = result;
3678                 }
3679                 delete[] result;
3680                 if (*c) *c = '.';
3681                 c = skip_till_newline(c);
3682                 qCDebug(KIO_MAN_LOG) << "end .di";
3683                 break;
3684             }
3685             case REQ_ds: // groff(7) "Define String variable"
3686                 mode = true;
3687                 Q_FALLTHROUGH();
3688             case REQ_as: // groff (7) "Append String variable"
3689             {
3690                 qCDebug(KIO_MAN_LOG) << "start .ds/.as";
3691                 int oldcurpos = curpos;
3692                 c += j;
3693                 const QByteArray name(scan_identifier(c));
3694                 if (name.isEmpty())
3695                     break;
3696                 // an initial " is removed to allow leading space
3697                 while (*c && isspace(*c)) c++;
3698                 if (*c == '"') c++;
3699 
3700                 single_escape = true;
3701                 curpos = 0;
3702                 char* result = nullptr;
3703                 c = scan_troff(c, 1, &result);
3704                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
3705                 if (it == s_stringDefinitionMap.end())
3706                 {
3707                     StringDefinition def;
3708                     def.m_length = curpos;
3709                     def.m_output = result;
3710                     s_stringDefinitionMap.insert(name, def);
3711                 }
3712                 else
3713                 {
3714                     if (mode)
3715                     {   // .ds Defining String
3716                         (*it).m_length = curpos;
3717                         (*it).m_output = result;
3718                     }
3719                     else
3720                     {   // .as Appending String
3721                         (*it).m_length += curpos;
3722                         (*it).m_output += result;
3723                     }
3724                 }
3725                 delete[] result;
3726                 single_escape = false;
3727                 curpos = oldcurpos;
3728                 qCDebug(KIO_MAN_LOG) << "end .ds/.as";
3729                 break;
3730             }
3731             case REQ_br: // groff(7) "line BReak"
3732             {
3733                 if (still_dd)
3734                     out_html("<DD>"); // ### VERIFY (does not look like generating good HTML)
3735                 else
3736                     out_html("<BR>\n");
3737                 curpos = 0;
3738                 c = c + j;
3739                 if (c[0] == escapesym) c = scan_escape(c + 1);
3740                 c = skip_till_newline(c);
3741                 break;
3742             }
3743             case REQ_c2: // groff(7) "reset non-break Control character" (2 means non-break)
3744             {
3745                 c = c + j;
3746                 if (*c != '\n')
3747                     nobreaksym = *c;
3748                 else
3749                     nobreaksym = '\'';
3750                 c = skip_till_newline(c);
3751                 break;
3752             }
3753             case REQ_cc: // groff(7) "reset Control Character"
3754             {
3755                 c = c + j;
3756                 if (*c != '\n')
3757                     controlsym = *c;
3758                 else
3759                     controlsym = '.';
3760                 c = skip_till_newline(c);
3761                 break;
3762             }
3763             case REQ_ce: // groff (7) "CEnter"
3764             {
3765                 c = c + j;
3766                 if (*c == '\n')
3767                     i = 1;
3768                 else
3769                 {
3770                     i = 0;
3771                     while ('0' <= *c && *c <= '9')
3772                     {
3773                         i = i * 10 + *c - '0';
3774                         c++;
3775                     }
3776                 }
3777                 c = skip_till_newline(c);
3778                 /* center next i lines */
3779                 if (i > 0)
3780                 {
3781                     out_html("<CENTER>\n");
3782                     while (i && *c)
3783                     {
3784                         char *line = nullptr;
3785                         c = scan_troff(c, 1, &line);
3786                         if (line && qstrncmp(line, "<BR>", 4))
3787                         {
3788                             out_html(line);
3789                             out_html("<BR>\n");
3790                             delete [] line; // ### FIXME: memory leak!
3791                             i--;
3792                         }
3793                     }
3794                     out_html("</CENTER>\n");
3795                     curpos = 0;
3796                 }
3797                 break;
3798             }
3799             case REQ_ec: // groff(7) "reset Escape Character"
3800             {
3801                 c = c + j;
3802                 if (*c != '\n')
3803                     escapesym = *c;
3804                 else
3805                     escapesym = '\\';
3806                 break;
3807             }
3808             case REQ_eo: // groff(7) "turn Escape character Off"
3809             {
3810                 escapesym = '\0';
3811                 c = skip_till_newline(c);
3812                 break;
3813             }
3814             case REQ_ex: // groff(7) "EXit"
3815             {
3816                 return nullptr;
3817                 break;
3818             }
3819             case REQ_fc: // groff(7) "set Field and pad Character"
3820             {
3821                 c = c + j;
3822                 if (*c == '\n')
3823                     fieldsym = padsym = '\0';
3824                 else
3825                 {
3826                     fieldsym = c[0];
3827                     padsym = c[1];
3828                 }
3829                 c = skip_till_newline(c);
3830                 break;
3831             }
3832             case REQ_fi: // groff(7) "FIll"
3833             {
3834                 if (!fillout)
3835                 {
3836                     out_html(set_font("R"));
3837                     out_html(change_to_size('0'));
3838                     out_html("</PRE>\n");
3839                 }
3840                 curpos = 0;
3841                 fillout = 1;
3842                 c = skip_till_newline(c);
3843                 break;
3844             }
3845             case REQ_ft: // groff(7) "FonT"
3846             {
3847                 c += j;
3848                 h = skip_till_newline(c);
3849                 const char oldChar = *h;
3850                 *h = 0;
3851                 const QByteArray name = c;
3852                 // ### TODO: name might contain a variable
3853                 if (name.isEmpty())
3854                     out_html(set_font("P"));     // Previous font
3855                 else
3856                     out_html(set_font(name));
3857                 *h = oldChar;
3858                 c = h;
3859                 break;
3860             }
3861             case REQ_el: // groff(7) "ELse"
3862             {
3863                 int ifelseval = s_ifelseval.pop();
3864                 /* .el anything : else part of if else */
3865                 if (ifelseval)
3866                 {
3867                     c = c + j;
3868                     c[-1] = '\n';
3869                     c = scan_troff(c, 1, nullptr);
3870                 }
3871                 else
3872                     c = skip_till_newline(c + j);
3873                 break;
3874             }
3875             case REQ_ie: // groff(7) "If with Else"
3876             /* .ie c anything : then part of if else */
3877             // fallthrough
3878             case REQ_if: // groff(7) "IF"
3879             {
3880                 /* .if c anything
3881                  * .if !c anything
3882                  * .if N anything
3883                  * .if !N anything
3884                  * .if 'string1'string2' anything
3885                  * .if !'string1'string2' anything
3886                  */
3887                 c = c + j;
3888                 c = scan_expression(c, &i);
3889                 if (request == REQ_ie)
3890                 {
3891                     int ifelseval = !i;
3892                     s_ifelseval.push(ifelseval);
3893                 }
3894                 if (i)
3895                 {
3896                     *c = '\n';
3897                     c++;
3898                     c = scan_troff(c, 1, nullptr);
3899                 }
3900                 else
3901                     c = skip_till_newline(c);
3902                 break;
3903             }
3904             case REQ_ig: // groff(7) "IGnore"
3905             {
3906                 const char *endwith = "..\n";
3907                 i = 3;
3908                 c = c + j;
3909                 if (*c != '\n' && *c != '\\')
3910                 {
3911                     /* Not newline or comment */
3912                     endwith = c - 1;
3913                     i = 1;
3914                     c[-1] = '.';
3915                     while (*c && *c != '\n') c++, i++;
3916                 }
3917                 c++;
3918                 while (*c && qstrncmp(c, endwith, i)) while (*c++ != '\n');
3919                 while (*c && *c++ != '\n');
3920                 break;
3921             }
3922             case REQ_nf: // groff(7) "No Filling"
3923             {
3924                 if (fillout)
3925                 {
3926                     out_html(set_font("R"));
3927                     out_html(change_to_size('0'));
3928                     out_html("<PRE>\n");
3929                 }
3930                 curpos = 0;
3931                 fillout = 0;
3932                 c = skip_till_newline(c);
3933                 break;
3934             }
3935             case REQ_ps: // groff(7) "previous Point Size"
3936             {
3937                 c += j;
3938                 getArguments(c, args);
3939                 if ( args.count() == 0 )
3940                     out_html(change_to_size('0'));
3941                 else
3942                 {
3943                     char *h = args[0].data();
3944                     int sign = 0;
3945                     i = 0;
3946                     if (*h == '-')
3947                     {
3948                         sign = -1;
3949                         h++;
3950                     }
3951                     else if (*h == '+') {
3952                         sign = 1;
3953                         h++;
3954                     }
3955                     scan_expression(h, &i);
3956                     if (sign == 0)
3957                     {
3958                         sign = 1;
3959                         if (i > 5) i = i - 10;
3960                     }
3961                     out_html(change_to_size(sign * i));
3962                 }
3963                 break;
3964             }
3965             case REQ_sp: // groff(7) "SKip one line"
3966             {
3967                 c += j;
3968                 if (fillout)
3969                     out_html("<br><br>");
3970                 else
3971                     out_html(NEWLINE);
3972                 curpos = 0;
3973                 c = skip_till_newline(c);
3974                 break;
3975             }
3976             case REQ_so: // groff(7) "Include SOurce file"
3977             {
3978                 char *buf;
3979                 char *name = nullptr;
3980                 curpos = 0;
3981                 c = c + j;
3982                 if (*c == '/')
3983                     h = c;
3984                 else
3985                 {
3986                     h = c - 3;
3987                     h[0] = '.';
3988                     h[1] = '.';
3989                     h[2] = '/';
3990                 }
3991                 while (*c != '\n') c++;
3992                 *c = '\0';
3993                 scan_troff(h, 1, &name);
3994                 if (name[3] == '/')
3995                     h = name + 3;
3996                 else
3997                     h = name;
3998 
3999                 // The format of the argument to .so varies among man pages.
4000                 // Some of them, e.g. pam.8, use "PAM.8".  Others, e.g. telinit.8,
4001                 // use "man8/init.8".  So they are not always true relative paths,
4002                 // although the man(1) command seems to handle them with no problem.
4003                 //
4004                 // The code above starting "h = c - 3" attempts to turn the argument
4005                 // into a relative path, but that is not correct in the case of pam.8
4006                 // as above.  So this removes the "../" prefix again if there is
4007                 // no other slash following it.
4008                 char *firstSlash = strchr(h, '/');
4009                 if (firstSlash != 0)
4010                 {
4011                     char *nextSlash = strchr(firstSlash + 1, '/');
4012                     if (nextSlash == 0)
4013                         h = firstSlash + 1;
4014                 }
4015 
4016                 /* this works alright, except for section 3 */
4017                 buf = read_man_page(h);
4018                 if (!buf)
4019                 {
4020                     qCDebug(KIO_MAN_LOG) << "Unable to open or read file: .so " << (h);
4021                     out_html("<BLOCKQUOTE>"
4022                              "man2html: unable to open or read file.\n");
4023                     out_html(h);
4024                     out_html("</BLOCKQUOTE>\n");
4025                 }
4026                 else
4027                     scan_troff(buf + 1, 0, nullptr);
4028                 delete [] buf;
4029                 delete [] name;
4030 
4031                 *c++ = '\n';
4032                 break;
4033             }
4034             case REQ_ta: // gorff(7) "set TAbulators"
4035             {
4036                 c = c + j;
4037                 j = 0;
4038                 while (*c != '\n')
4039                 {
4040                     sl = scan_expression(c, &tabstops[j]);
4041                     if (j > 0 && (*c == '-' || *c == '+')) tabstops[j] += tabstops[j-1];
4042                     c = sl;
4043                     while (*c == ' ' || *c == '\t') c++;
4044                     j++;
4045                 }
4046                 maxtstop = j;
4047                 curpos = 0;
4048                 break;
4049             }
4050             case REQ_ti: // groff(7) "Temporary Indent"
4051             {
4052                 /*while (itemdepth || dl_set[itemdepth]) {
4053                     out_html("</DL>\n");
4054                     if (dl_set[itemdepth]) dl_set[itemdepth]=0;
4055                     else itemdepth--;
4056                 }*/
4057                 out_html("<BR>\n");
4058                 c = c + j;
4059                 c = scan_expression(c, &j);
4060                 for (i = 0; i < j; i++) out_html("&nbsp;");
4061                 curpos = j;
4062                 c = skip_till_newline(c);
4063                 break;
4064             }
4065             case REQ_tm: // groff(7) "TerMinal" ### TODO: what are useful uses for it
4066             {
4067                 c += j;
4068                 getArguments(c, args);
4069                 if ( args.count() )
4070                     qCDebug(KIO_MAN_LOG) << ".tm " << args[0];
4071                 break;
4072             }
4073             case REQ_B: // man(7) "Bold"
4074                 mode = true;
4075                 Q_FALLTHROUGH();
4076             case REQ_I: // man(7) "Italic"
4077             {
4078                 /* parse one line in a certain font */
4079                 c += j;
4080                 getArguments(c, args);
4081 
4082                 out_html(set_font(mode ? "B" : "I"));
4083 
4084                 for (int i = 0; i < args.count(); i++)
4085                 {
4086                     scan_troff(args[i].data(), 1, nullptr);
4087                     out_html(" ");
4088                 }
4089 
4090                 out_html(set_font("R"));
4091 
4092                 if (fillout)
4093                     curpos++;
4094                 else
4095                 {
4096                     out_html(NEWLINE);
4097                     curpos = 0;
4098                 }
4099                 break;
4100             }
4101             case REQ_Fd: // mdoc(7) "Function Definition"
4102             {
4103                 // Normal text must be printed in bold, punctuation in regular font
4104                 c += j;
4105                 if (*c == '\n') c++;
4106                 getArguments(c, args);
4107 
4108                 for (i = 0; i < args.count(); i++)
4109                 {
4110                     // ### FIXME In theory, only a single punctuation character is recognized as punctuation
4111                     if ( is_mdoc_punctuation(args[i][0]) )
4112                         out_html(set_font("R"));
4113                     else
4114                         out_html(set_font("B"));
4115                     scan_troff(args[i].data(), 1, nullptr);
4116                     out_html(" ");
4117                 }
4118                 // In the mdoc synopsis, there are automatical line breaks (### TODO: before or after?)
4119                 if (mandoc_synopsis)
4120                     out_html("<br>");
4121 
4122                 out_html(set_font("R"));
4123                 out_html(NEWLINE);
4124                 if (!fillout)
4125                     curpos = 0;
4126                 else
4127                     curpos++;
4128                 break;
4129             }
4130             case REQ_Fn: // mdoc(7)  for "Function calls"
4131             {
4132                 // brackets and commas have to be inserted automatically
4133                 c += j;
4134                 if (*c == '\n') c++;
4135                 getArguments(c, args);
4136                 if ( args.count() )
4137                 {
4138                     for (i = 0; i < args.count(); i++)
4139                     {
4140                         if (i)
4141                             out_html(set_font("I"));
4142                         else
4143                             out_html(set_font("B"));
4144                         scan_troff(args[i].data(), 1, nullptr);
4145                         out_html(set_font("R"));
4146                         if (i == 0)
4147                         {
4148                             out_html(" (");
4149                         }
4150                         else if (i < args.count() - 1)
4151                             out_html(", ");
4152                     }
4153                     out_html(")");
4154                 }
4155                 out_html(set_font("R"));
4156                 if (mandoc_synopsis)
4157                     out_html("<br>");
4158                 out_html(NEWLINE);
4159                 if (!fillout)
4160                     curpos = 0;
4161                 else
4162                     curpos++;
4163                 break;
4164             }
4165             case REQ_Fo: // mdoc(7) "Function definition Opening"
4166             {
4167                 char* font[2] = {(char*)"B", (char*)"R" };
4168                 c += j;
4169                 if (*c == '\n') c++;
4170                 char *eol = strchr(c, '\n');
4171                 char *semicolon = strchr(c, ';');
4172                 if ((semicolon != nullptr) && (semicolon < eol)) *semicolon = ' ';
4173 
4174                 getArguments(c, args);
4175                 // Normally a .Fo has only one parameter
4176                 for (i = 0; i < args.count(); i++)
4177                 {
4178                     out_html(set_font(font[i&1]));
4179                     scan_troff(args[i].data(), 1, nullptr);
4180                     if (i == 0)
4181                     {
4182                         out_html(" (");
4183                     }
4184                     // ### TODO What should happen if there is more than one argument
4185                     // else if (i<args.count()-1) out_html(", ");
4186                 }
4187                 function_argument = 1; // Must be > 0
4188                 out_html(set_font("R"));
4189                 out_html(NEWLINE);
4190                 if (!fillout)
4191                     curpos = 0;
4192                 else
4193                     curpos++;
4194                 break;
4195             }
4196             case REQ_Fc:// mdoc(7) "Function definition Close"
4197             {
4198                 // .Fc has no parameter
4199                 c += j;
4200                 c = skip_till_newline(c);
4201                 char* font[2] = {(char*)"B", (char*)"R" };
4202                 out_html(set_font(font[i&1]));
4203                 out_html(")");
4204                 out_html(set_font("R"));
4205                 if (mandoc_synopsis)
4206                     out_html("<br>");
4207                 out_html(NEWLINE);
4208                 if (!fillout)
4209                     curpos = 0;
4210                 else
4211                     curpos++;
4212                 function_argument = 0; // Reset the count variable
4213                 break;
4214             }
4215             case REQ_Fa: // mdoc(7) "Function definition argument"
4216             {
4217                 char* font[2] = {(char*)"B", (char*)"R" };
4218                 c += j;
4219                 if (*c == '\n') c++;
4220                 getArguments(c, args);
4221                 out_html(set_font(font[i&1]));
4222                 // function_argument==0 means that we had no .Fo  before, e.g. in mdoc.samples(7)
4223                 if (function_argument > 1)
4224                 {
4225                     out_html(", ");
4226                     curpos += 2;
4227                     function_argument++;
4228                 }
4229                 else if (function_argument == 1)
4230                 {
4231                     // We are only at the first parameter
4232                     function_argument++;
4233                 }
4234                 for (i = 0; i < args.count(); i++)
4235                     scan_troff(args[i].data(), 1, nullptr);
4236 
4237                 out_html(set_font("R"));
4238                 if (!fillout)
4239                     curpos = 0;
4240                 else
4241                     curpos++;
4242                 break;
4243             }
4244 
4245             case REQ_OP:  /* groff manpages use this construction */
4246             {
4247                 /* .OP a b : [ <B>a</B> <I>b</I> ] */
4248                 out_html(set_font("R"));
4249                 out_html("[");
4250                 curpos++;
4251                 request_mixed_fonts(c, j, "B", "I", true, false);
4252                 break;
4253             }
4254             case REQ_Ft:       //perhaps "Function return type"
4255             {
4256                 request_mixed_fonts(c, j, "B", "I", false, true);
4257                 break;
4258             }
4259             case REQ_BR:
4260             {
4261                 request_mixed_fonts(c, j, "B", "R", false, false);
4262                 break;
4263             }
4264             case REQ_BI:
4265             {
4266                 request_mixed_fonts(c, j, "B", "I", false, false);
4267                 break;
4268             }
4269             case REQ_IB:
4270             {
4271                 request_mixed_fonts(c, j, "I", "B", false, false);
4272                 break;
4273             }
4274             case REQ_IR:
4275             {
4276                 request_mixed_fonts(c, j, "I", "R", false, false);
4277                 break;
4278             }
4279             case REQ_RB:
4280             {
4281                 request_mixed_fonts(c, j, "R", "B", false, false);
4282                 break;
4283             }
4284             case REQ_RI:
4285             {
4286                 request_mixed_fonts(c, j, "R", "I", false, false);
4287                 break;
4288             }
4289             case REQ_DT: // man(7) "Default Tabulators"
4290             {
4291                 for (j = 0; j < 20; j++) tabstops[j] = (j + 1) * 8;
4292                 maxtstop = 20;
4293                 c = skip_till_newline(c);
4294                 break;
4295             }
4296             case REQ_IP: // man(7) "Ident Paragraph"
4297             {
4298                 c += j;
4299                 getArguments(c, args);
4300 
4301                 if (!dl_set[itemdepth])
4302                 {
4303                     out_html("<DL>\n");
4304                     dl_set[itemdepth] = 1;
4305                 }
4306                 out_html("<DT>");
4307 
4308                 if ( args.count() )
4309                     scan_troff(args[0].data(), 1, nullptr);
4310 
4311                 out_html("</DT>\n<DD>");
4312                 listItemStack.push("DD");
4313                 curpos = 0;
4314                 break;
4315             }
4316             case REQ_TP: // man(7) "hanging Tag Paragraph"
4317             {
4318                 if (!dl_set[itemdepth])
4319                 {
4320                     out_html("<DL>\n");
4321                     dl_set[itemdepth] = 1;
4322                 }
4323                 out_html(set_font("R"));
4324                 out_html("<DT>");
4325                 c = skip_till_newline(c);
4326                 /* somewhere a definition ends with '.TP' */
4327                 if (!*c)
4328                     still_dd = true;
4329                 else
4330                 {
4331                     // HACK for proc(5)
4332                     while (c[0] == '.' && c[1] == '\\' && c[2] == '\"')
4333                     {
4334                         // We have a comment, so skip the line
4335                         c = skip_till_newline(c);
4336                     }
4337                     c = scan_troff(c, 1, nullptr);
4338                     out_html("<DD>");
4339                     listItemStack.push("DD");
4340                 }
4341                 curpos = 0;
4342                 break;
4343             }
4344             case REQ_IX: // Indexing term (printed on standard error)
4345             {
4346                 c = skip_till_newline(c); // ignore
4347                 break;
4348             }
4349             case REQ_P: // man(7) "Paragraph"
4350             case REQ_LP:// man(7) "Paragraph"
4351             case REQ_PP:// man(7) "Paragraph; reset Prevailing indent"
4352             {
4353                 if (dl_set[itemdepth])
4354                 {
4355                     out_html("</DL>\n");
4356                     dl_set[itemdepth] = 0;
4357                 }
4358                 else if (fillout) out_html("<br>");
4359 
4360                 if (fillout)
4361                     out_html("<br>\n");
4362                 else
4363                     out_html(NEWLINE);
4364 
4365                 curpos = 0;
4366                 c = skip_till_newline(c);
4367                 break;
4368             }
4369             case REQ_HP: // man(7) "Hanging indent Paragraph"
4370             {
4371                 if (!dl_set[itemdepth])
4372                 {
4373                     out_html("<DL>");
4374                     dl_set[itemdepth] = 1;
4375                 }
4376                 out_html("<DT>\n");
4377                 still_dd = true;
4378                 c = skip_till_newline(c);
4379                 curpos = 0;
4380                 break;
4381             }
4382             case REQ_PD: // man(7) "Paragraph Distance"
4383             {
4384                 c = skip_till_newline(c);
4385                 break;
4386             }
4387             case REQ_Rs: // mdoc(7) "Relative margin Start"
4388             case REQ_RS: // man(7) "Relative margin Start"
4389             {
4390                 c += j;
4391                 getArguments(c, args);
4392                 j = 1;
4393                 if (args.count() > 0) scan_expression(args[0].data(), &j);
4394                 if (j >= 0)
4395                 {
4396                     itemdepth++;
4397                     dl_set[itemdepth] = 0;
4398                     out_html("<DL><DT></DT><DD>");
4399                     listItemStack.push("DD");
4400                     curpos = 0;
4401                 }
4402                 break;
4403             }
4404             case REQ_Re: // mdoc(7) "Relative margin End"
4405             case REQ_RE: // man(7) "Relative margin End"
4406             {
4407                 if (itemdepth > 0)
4408                 {
4409                     if (dl_set[itemdepth]) out_html("</DL>");
4410                     out_html("</DL>\n");
4411                     itemdepth--;
4412                 }
4413                 c = skip_till_newline(c);
4414                 curpos = 0;
4415                 break;
4416             }
4417             case REQ_SB: // man(7) "Small; Bold"
4418             {
4419                 out_html(set_font("B"));
4420                 out_html("<small>");
4421                 c = scan_troff(c + j, 1, nullptr);
4422                 out_html("</small>");
4423                 out_html(set_font("R"));
4424                 break;
4425             }
4426             case REQ_SM: // man(7) "SMall"
4427             {
4428                 c = c + j;
4429                 if (*c == '\n') c++;
4430                 out_html("<small>");
4431                 c = scan_troff(c, 1, nullptr);
4432                 out_html("</small>");
4433                 break;
4434             }
4435             case REQ_Ss: // mdoc(7) "Sub Section"
4436                 mandoc_command = 1;
4437                 Q_FALLTHROUGH();
4438             case REQ_SS: // mdoc(7) "Sub Section"
4439                 mode = true;
4440                 Q_FALLTHROUGH();
4441             case REQ_Sh: // mdoc(7) "Sub Header"
4442                 /* hack for fallthru from above */
4443                 mandoc_command = !mode || mandoc_command;
4444                 Q_FALLTHROUGH();
4445             case REQ_SH: // man(7) "Sub Header"
4446             {
4447                 c = c + j;
4448                 if (*c == '\n') c++;
4449                 while (itemdepth || dl_set[itemdepth])
4450                 {
4451                     out_html("</DL>\n");
4452                     if (dl_set[itemdepth])
4453                         dl_set[itemdepth] = 0;
4454                     else if (itemdepth > 0)
4455                         itemdepth--;
4456                 }
4457                 out_html(set_font("R"));
4458                 out_html(change_to_size(0));
4459                 if (!fillout)
4460                 {
4461                     fillout = 1;
4462                     out_html("</PRE>");
4463                 }
4464                 trans_char(c, '"', '\a');
4465                 if (in_div)
4466                 {
4467                     out_html("</div>\n");
4468                     in_div = 0;
4469                 }
4470                 if (mode)
4471                     out_html("\n<H3>");
4472                 else
4473                     out_html("\n<H2>");
4474                 mandoc_synopsis = qstrncmp(c, "SYNOPSIS", 8) == 0;
4475                 c = mandoc_command ? scan_troff_mandoc(c, 1, nullptr) : scan_troff(c, 1, nullptr);
4476                 if (mode)
4477                     out_html("</H3>\n");
4478                 else
4479                     out_html("</H2>\n");
4480 
4481                 out_html("<div>\n");
4482                 in_div = 1;
4483                 curpos = 0;
4484                 break;
4485             }
4486             case REQ_Sx: // mdoc(7)
4487             {
4488                 // reference to a section header
4489                 out_html(set_font("B"));
4490                 trans_char(c, '"', '\a');
4491                 c = c + j;
4492                 if (*c == '\n') c++;
4493                 c = scan_troff(c, 1, nullptr);
4494                 out_html(set_font("R"));
4495                 out_html(NEWLINE);
4496                 if (fillout)
4497                     curpos++;
4498                 else
4499                     curpos = 0;
4500                 break;
4501             }
4502             case REQ_St: // groff_mdoc
4503             {
4504                 c += j;
4505                 getArguments(c, args);
4506                 if ( args.count() )
4507                 {
4508                     bool found = false;
4509                     for (const StandardName &standardName : STANDARD_NAMES)
4510                     {
4511                         if ( args[0] == standardName.abbrev )
4512                         {
4513                             found = true;
4514                             out_html(standardName.formalName);
4515                             break;
4516                         }
4517                     }
4518                     if ( !found )  // an unknown standard - print the abbreviation
4519                         out_html(args[0]);
4520                 }
4521                 break;
4522             }
4523             case REQ_TS: // Table Start tbl(1)
4524             {
4525                 c = scan_table(c);
4526                 break;
4527             }
4528             case REQ_Dt:    /* mdoc(7) */
4529                 mandoc_command = true;
4530                 Q_FALLTHROUGH();
4531             case REQ_TH: // man(7) "Title Header"
4532             {
4533                 if (!output_possible)
4534                 {
4535                     c += j;
4536                     getArguments(c, args);
4537                     output_possible = true;
4538                     out_html(DOCTYPE"<HTML>\n<HEAD>\n");
4539                     out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n");
4540                     out_html("<TITLE>");
4541                     if ( args.count() )
4542                     {
4543                         // work around the problem that in a title no HTML tags are allowed
4544                         // but args[0] can have formatting escapes, e.g. to switch a font
4545                         // which results in a HTML tag added to the output
4546                         char *result = nullptr;
4547                         scan_troff(args[0].data(), 0, &result);
4548                         char *p = result;
4549                         QByteArray title;
4550                         while ( *p )
4551                         {
4552                             if ( *p == '<' ) // tag begin -> skip whole tag
4553                             {
4554                                 for (p++; *p && (*p != '>'); p++) ;
4555                                 if ( *p ) p++;
4556                             }
4557                             if ( *p )
4558                                 title += *p++;
4559                         }
4560                         ignore_links = true;
4561                         title += '\n';  // needed so that out_html flushes buffer and ignore_links works
4562                         out_html(title);
4563                         ignore_links = false;
4564                         delete [] result;
4565                     }
4566                     out_html(" Manpage</TITLE>\n");
4567 
4568                     // KDE defaults.
4569                     out_html("<link rel=\"stylesheet\" href=\"help:/kdoctools5-common/kde-default.css\"");
4570                     out_html(" type=\"text/css\">\n");
4571 
4572                     // Output our custom stylesheet.
4573                     out_html("<link rel=\"stylesheet\" href=\"");
4574                     out_html(cssFile);
4575                     out_html("\" type=\"text/css\">\n");
4576 
4577                     // Some elements need background images, but this
4578                     // could not be included in the stylesheet,
4579                     // include it now.
4580                     out_html("<style type=\"text/css\">\n#header_top { "
4581                              "background-image: url(\"help:/kdoctools5-common/top.jpg\"); }\n\n"
4582                              "#header_top div { "
4583                              "background-image: url(\"help:/kdoctools5-common/top-left.jpg\"); }\n\n"
4584                              "#header_top div div { "
4585                              "background-image: url(\"help:/kdoctools5-common/top-right.jpg\"); }\n\n"
4586                              "</style>\n\n"
4587                             );
4588 
4589                     out_html("<meta name=\"ROFF_Type\" content=\"");
4590                     if (mandoc_command)
4591                         out_html("mdoc");
4592                     else
4593                         out_html("man");
4594                     out_html("\">\n");
4595 
4596                     out_html("</HEAD>\n\n");
4597                     out_html("<BODY>\n\n");
4598 
4599                     out_html("<div id=\"header\"><div id=\"header_top\">\n");
4600                     out_html("<div><div>\n");
4601                     out_html("<img src=\"help:/kdoctools5-common/top-kde.jpg\" alt=\"top-kde\"> ");
4602                     if ( args.count() )
4603                         scan_troff(args[0].data(), 0, nullptr);
4604                     out_html(" Manual Page");
4605                     out_html("</div></div></div></div>\n");
4606 
4607                     out_html("<div style=\"margin-left: 5em; margin-right: 5em;\">\n");
4608                     out_html("<h1>");
4609                     if ( args.count() )
4610                         scan_troff(args[0].data(), 0, nullptr);
4611                     out_html("</h1>\n");
4612                     if (args.count() > 1)
4613                     {
4614                         out_html("Section: ");
4615                         if ( !mandoc_command && (args.count() > 4) )
4616                             scan_troff(args[4].data(), 0, nullptr);
4617                         else
4618                             out_html(section_name(args[1].data()));
4619                         out_html(" (");
4620                         scan_troff(args[1].data(), 0, nullptr);
4621                         out_html(")\n");
4622                     }
4623                     else
4624                     {
4625                         out_html("Section not specified");
4626                     }
4627                 }
4628                 else
4629                 {
4630                     qCWarning(KIO_MAN_LOG) << ".TH found but output not possible" ;
4631                     c = skip_till_newline(c);
4632                 }
4633                 curpos = 0;
4634                 break;
4635             }
4636             case REQ_TX: // mdoc(7)
4637             {
4638                 c += j;
4639                 getArguments(c, args);
4640                 out_html(set_font("I"));
4641                 const char *c2 = lookup_abbrev(args[0]);
4642                 curpos += qstrlen(c2);
4643                 out_html(c2);
4644                 out_html(set_font("R"));
4645                 if (args.count() > 1)
4646                     out_html(args[1]);
4647                 break;
4648             }
4649             case REQ_rm: // groff(7) "ReMove"
4650                 /* .rm xx : Remove request, macro or string */
4651                 mode = true;
4652                 Q_FALLTHROUGH();
4653             case REQ_rn: // groff(7) "ReName"
4654                 /* .rn xx yy : Rename request, macro or string xx to yy */
4655             {
4656                 qCDebug(KIO_MAN_LOG) << "start .rm/.rn";
4657                 c += j;
4658                 const QByteArray name(scan_identifier(c));
4659                 if (name.isEmpty())
4660                 {
4661                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename";
4662                     break;
4663                 }
4664                 QByteArray name2;
4665                 if (!mode)
4666                 {
4667                     while (*c && isspace(*c) && *c != '\n') ++c;
4668                     name2 = scan_identifier(c);
4669                     if (name2.isEmpty())
4670                     {
4671                         qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to rename";
4672                         break;
4673                     }
4674                 }
4675                 c = skip_till_newline(c);
4676                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
4677                 if (it == s_stringDefinitionMap.end())
4678                 {
4679                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to rename or remove: " << BYTEARRAY(name);
4680                 }
4681                 else
4682                 {
4683                     if (mode)
4684                     {
4685                         // .rm ReMove
4686                         s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4687                     }
4688                     else
4689                     {
4690                         // .rn ReName
4691                         StringDefinition def = (*it);
4692                         s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4693                         s_stringDefinitionMap.insert(name2, def);
4694                     }
4695                 }
4696                 qCDebug(KIO_MAN_LOG) << "end .rm/.rn";
4697                 break;
4698             }
4699             case REQ_nx:
4700             case REQ_in: // groff(7) "INdent"
4701             {
4702                 /* .in +-N : Indent */
4703                 c = skip_till_newline(c);
4704                 break;
4705             }
4706             case REQ_nr: // groff(7) "Number Register"
4707             {
4708                 qCDebug(KIO_MAN_LOG) << "start .nr";
4709                 c += j;
4710                 const QByteArray name(scan_identifier(c));
4711                 if (name.isEmpty())
4712                 {
4713                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty name for register variable";
4714                     break;
4715                 }
4716                 while (*c && (*c == ' ' || *c == '\t')) c++;
4717                 int sign = 0;
4718                 if (*c && (*c == '+' || *c == '-'))
4719                 {
4720                     if (*c == '+')
4721                         sign = 1;
4722                     else if (*c == '-')
4723                         sign = -1;
4724                 }
4725                 int value = 0;
4726                 int increment = 0;
4727                 c = scan_expression(c, &value);
4728                 if (*c && *c != '\n')
4729                 {
4730                     while (*c && (*c == ' ' || *c == '\t')) c++;
4731                     c = scan_expression(c, &increment);
4732                 }
4733                 c = skip_till_newline(c);
4734                 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
4735                 if (it == s_numberDefinitionMap.end())
4736                 {
4737                     if (sign < 1)
4738                         value = -value;
4739                     NumberDefinition def(value, increment);
4740                     s_numberDefinitionMap.insert(name, def);
4741                 }
4742                 else
4743                 {
4744                     if (sign > 0)
4745                         (*it).m_value += value;
4746                     else if (sign < 0)
4747                         (*it).m_value += - value;
4748                     else
4749                         (*it).m_value = value;
4750                     (*it).m_increment = increment;
4751                 }
4752                 qCDebug(KIO_MAN_LOG) << "end .nr";
4753                 break;
4754             }
4755             case REQ_am: // groff(7) "Append Macro"
4756                 /* .am xx yy : append to a macro. */
4757                 /* define or handle as .ig yy */
4758                 mode = true;
4759                 Q_FALLTHROUGH();
4760             case REQ_de: // groff(7) "DEfine macro"
4761             case REQ_de1: // groff(7) "DEfine macro"
4762             {
4763                 /* .de xx yy : define or redefine macro xx; end at .yy (..) */
4764                 /* define or handle as .ig yy */
4765                 qCDebug(KIO_MAN_LOG) << "Start .am/.de";
4766                 c += j;
4767                 getArguments(c, args);
4768                 if ( args.count() == 0 )
4769                     break;
4770 
4771                 const QByteArray name(args[0]);
4772 
4773                 QByteArray endmacro;
4774                 if (args.count() == 1)
4775                     endmacro = "..";
4776                 else
4777                     endmacro = "." + args[1]; // krazy:exclude=doublequote_chars
4778 
4779                 sl = c;
4780                 while (*c && qstrncmp(c, endmacro, endmacro.length()))
4781                     c = skip_till_newline(c);
4782 
4783                 QByteArray macro;
4784                 while (sl != c)
4785                 {
4786                     if (sl[0] == '\\' && sl[1] == '\\')
4787                     {
4788                         macro += '\\';
4789                         sl++;
4790                     }
4791                     else
4792                         macro += *sl;
4793                     sl++;
4794                 }
4795 
4796                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
4797                 if (it == s_stringDefinitionMap.end())
4798                 {
4799                     StringDefinition def;
4800                     def.m_length = 0;
4801                     def.m_output = macro;
4802                     s_stringDefinitionMap.insert(name, def);
4803                 }
4804                 else if (mode)
4805                 {
4806                     // .am Append Macro
4807                     (*it).m_length = 0; // It could be formerly a string
4808                     if (!(*it).m_output.endsWith('\n'))
4809                         (*it).m_output += '\n';
4810                     (*it).m_output += macro;
4811                 }
4812                 else
4813                 {
4814                     // .de DEfine macro
4815                     (*it).m_length = 0; // It could be formerly a string
4816                     (*it).m_output = macro;
4817                 }
4818                 c = skip_till_newline(c);
4819                 qCDebug(KIO_MAN_LOG) << "End .am/.de";
4820                 break;
4821             }
4822             case REQ_Bl: // mdoc(7) "Begin List"
4823             {
4824                 QByteArray list_options;
4825                 char *nl = strchr(c, '\n');
4826                 c = c + j;
4827                 if (dl_set[itemdepth])
4828                 {
4829                     /* These things can nest. */
4830                     itemdepth++;
4831                 }
4832                 if (nl)
4833                 {
4834                     /* Parse list options */
4835                     list_options = QByteArray(c, nl - c);
4836                 }
4837                 if ( list_options.contains("-bullet") )
4838                 {
4839                     /* HTML Unnumbered List */
4840                     dl_set[itemdepth] = BL_BULLET_LIST;
4841                     out_html("<UL>\n");
4842                 }
4843                 else if ( list_options.contains("-enum") )
4844                 {
4845                     /* HTML Ordered List */
4846                     dl_set[itemdepth] = BL_ENUM_LIST;
4847                     out_html("<OL>\n");
4848                 }
4849                 else
4850                 {
4851                     /* HTML Descriptive List */
4852                     dl_set[itemdepth] = BL_DESC_LIST;
4853                     out_html("<DL>\n");
4854                 }
4855                 curpos = 0;
4856                 c = skip_till_newline(c);
4857                 break;
4858             }
4859             case REQ_El: // mdoc(7) "End List"
4860             {
4861                 checkListStack();
4862                 c = c + j;
4863                 if (dl_set[itemdepth] & BL_DESC_LIST)
4864                     out_html("</DL>\n");
4865                 else if (dl_set[itemdepth] & BL_BULLET_LIST)
4866                     out_html("</UL>\n");
4867                 else if (dl_set[itemdepth] & BL_ENUM_LIST)
4868                     out_html("</OL>\n");
4869                 dl_set[itemdepth] = 0;
4870                 if (itemdepth > 0) itemdepth--;
4871                 if ( !fillout )
4872                     out_html(NEWLINE);
4873 
4874                 curpos = 0;
4875                 c = skip_till_newline(c);
4876                 break;
4877             }
4878             case REQ_It: // mdoc(7) "list ITem"
4879             {
4880                 checkListStack();
4881                 c = c + j;
4882                 //if (qstrncmp(c, "Xo", 2) == 0 && isspace(*(c + 2)))
4883                 //c = skip_till_newline(c);
4884                 if (dl_set[itemdepth] & BL_DESC_LIST)
4885                 {
4886                     out_html("<DT>");
4887                     out_html(set_font("B"));
4888                     if (*c == '\n')
4889                     {
4890                         /* Don't allow embedded comms after a newline */
4891                         c++;
4892                         c = scan_troff(c, 1, nullptr);
4893                     }
4894                     else
4895                     {
4896                         /* Do allow embedded comms on the same line. */
4897                         c = scan_troff_mandoc(c, 1, nullptr);
4898                     }
4899                     out_html(set_font("R"));
4900                     out_html("</DT>");
4901                     out_html(NEWLINE);
4902                     out_html("<DD>");
4903                     listItemStack.push("DD");
4904                 }
4905                 else if (dl_set[itemdepth] & (BL_BULLET_LIST | BL_ENUM_LIST))
4906                 {
4907                     out_html("<LI>");
4908                     listItemStack.push("LI");
4909                     c = scan_troff_mandoc(c, 1, nullptr);
4910                     out_html(NEWLINE);
4911                 }
4912                 if (fillout)
4913                     curpos++;
4914                 else
4915                     curpos = 0;
4916                 break;
4917             }
4918             case REQ_Bk:    /* mdoc(7) */
4919             case REQ_Ek:    /* mdoc(7) */
4920             case REQ_Dd:    /* mdoc(7) */
4921             case REQ_Os:    // mdoc(7) "Operating System"
4922             case REQ_Sm:    // mdoc(7) space mode
4923                 c = skip_till_newline(c);  // TODO
4924                 break;
4925             case REQ_Bt: // mdoc(7) "Beta Test"
4926             {
4927                 //trans_char(c, '"', '\a');
4928                 //c = c + j;
4929                 out_html(" is currently in beta test.");
4930                 if (fillout)
4931                     curpos++;
4932                 else
4933                     curpos = 0;
4934                 break;
4935             }
4936             case REQ_At:    /* mdoc(7) */
4937             case REQ_Fx:    /* mdoc(7) */
4938             case REQ_Nx:    /* mdoc(7) */
4939             case REQ_Ox:    /* mdoc(7) */
4940             case REQ_Bx:    /* mdoc(7) */
4941             case REQ_Ux:    /* mdoc(7) */
4942             case REQ_Dx:    /* mdoc(7) */
4943             {
4944                 bool parsable = true;
4945                 trans_char(c, '"', '\a');
4946                 c = c + j;
4947                 if (*c == '\n') c++;
4948                 if (request == REQ_At)
4949                 {
4950                     out_html("AT&amp;T UNIX ");
4951                     parsable = false;
4952                 }
4953                 else if (request == REQ_Fx)
4954                 {
4955                     out_html("FreeBSD ");
4956                     parsable = false;
4957                 }
4958                 else if (request == REQ_Nx)
4959                     out_html("NetBSD ");
4960                 else if (request == REQ_Ox)
4961                     out_html("OpenBSD ");
4962                 else if (request == REQ_Bx)
4963                     out_html("BSD ");
4964                 else if (request == REQ_Ux)
4965                     out_html("UNIX ");
4966                 else if (request == REQ_Dx)
4967                     out_html("DragonFly ");
4968                 if (parsable)
4969                     c = scan_troff_mandoc(c, 1, nullptr);
4970                 else
4971                     c = scan_troff(c, 1, nullptr);
4972                 if (fillout)
4973                     curpos++;
4974                 else
4975                     curpos = 0;
4976                 break;
4977             }
4978             case REQ_Dl:    /* mdoc(7) */
4979             {
4980                 c = c + j;
4981                 out_html(NEWLINE);
4982                 out_html("<BLOCKQUOTE>");
4983                 if (*c == '\n') c++;
4984                 c = scan_troff_mandoc(c, 1, nullptr);
4985                 out_html("</BLOCKQUOTE>");
4986                 if (fillout)
4987                     curpos++;
4988                 else
4989                     curpos = 0;
4990                 break;
4991             }
4992             case REQ_Bd:    /* mdoc(7) */
4993             {   /* Seems like a kind of example/literal mode */
4994                 QByteArray bd_options;
4995                 char *nl = strchr(c, '\n');
4996                 c = c + j;
4997                 if (nl)
4998                     bd_options = QByteArray(c, nl - c);
4999                 out_html(NEWLINE);
5000                 mandoc_bd_options = 0; /* Remember options for terminating Bl */
5001                 if ( bd_options.contains("-offset indent") )
5002                 {
5003                     mandoc_bd_options |= BD_INDENT;
5004                     out_html("<BLOCKQUOTE>\n");
5005                 }
5006                 if ( bd_options.contains("-literal") || bd_options.contains("-unfilled") )
5007                 {
5008                     if (fillout)
5009                     {
5010                         mandoc_bd_options |= BD_LITERAL;
5011                         out_html(set_font("R"));
5012                         out_html(change_to_size('0'));
5013                         out_html("<PRE>\n");
5014                     }
5015                     curpos = 0;
5016                     fillout = 0;
5017                 }
5018                 c = skip_till_newline(c);
5019                 break;
5020             }
5021             case REQ_Ed:    /* mdoc(7) */
5022             {
5023                 if (mandoc_bd_options & BD_LITERAL)
5024                 {
5025                     if (!fillout)
5026                     {
5027                         out_html(set_font("R"));
5028                         out_html(change_to_size('0'));
5029                         out_html("</PRE>\n");
5030                     }
5031                 }
5032                 if (mandoc_bd_options & BD_INDENT)
5033                     out_html("</BLOCKQUOTE>\n");
5034                 curpos = 0;
5035                 fillout = 1;
5036                 c = skip_till_newline(c);
5037                 break;
5038             }
5039             case REQ_Be:    /* mdoc(7) */
5040             {
5041                 c = c + j;
5042                 if (fillout)
5043                     out_html("<br><br>");
5044                 else
5045                 {
5046                     out_html(NEWLINE);
5047                 }
5048                 curpos = 0;
5049                 c = skip_till_newline(c);
5050                 break;
5051             }
5052             case REQ_Xr:    /* mdoc(7) */ // ### FIXME: it should issue a <a href="man:somewhere(x)"> directly
5053             {
5054                 /* Translate xyz 1 to xyz(1)
5055                  * Allow for multiple spaces.  Allow the section to be missing.
5056                  */
5057                 char buff[NULL_TERMINATED(MED_STR_MAX)];
5058                 char *bufptr;
5059                 trans_char(c, '"', '\a');
5060                 bufptr = buff;
5061                 c = c + j;
5062                 if (*c == '\n') c++; /* Skip spaces */
5063                 while (isspace(*c) && *c != '\n') c++;
5064                 while (isalnum(*c) || *c == '.' || *c == ':' || *c == '_' || *c == '-')
5065                 {
5066                     /* Copy the xyz part */
5067                     *bufptr = *c;
5068                     bufptr++;
5069                     if (bufptr >= buff + MED_STR_MAX) break;
5070                     c++;
5071                 }
5072                 while (isspace(*c) && *c != '\n') c++;    /* Skip spaces */
5073                 if (isdigit(*c))
5074                 {
5075                     /* Convert the number if there is one */
5076                     *bufptr = '(';
5077                     bufptr++;
5078                     if (bufptr < buff + MED_STR_MAX)
5079                     {
5080                         while (isalnum(*c))
5081                         {
5082                             *bufptr = *c;
5083                             bufptr++;
5084                             if (bufptr >= buff + MED_STR_MAX) break;
5085                             c++;
5086                         }
5087                         if (bufptr < buff + MED_STR_MAX)
5088                         {
5089                             *bufptr = ')';
5090                             bufptr++;
5091                         }
5092                     }
5093                 }
5094                 while (*c != '\n')
5095                 {
5096                     /* Copy the remainder */
5097                     if (!isspace(*c))
5098                     {
5099                         *bufptr = *c;
5100                         bufptr++;
5101                         if (bufptr >= buff + MED_STR_MAX) break;
5102                     }
5103                     c++;
5104                 }
5105                 *bufptr = '\n';
5106                 bufptr[1] = 0;
5107                 scan_troff_mandoc(buff, 1, nullptr);
5108                 out_html(NEWLINE);
5109                 if (fillout)
5110                     curpos++;
5111                 else
5112                     curpos = 0;
5113                 break;
5114             }
5115             case REQ_Fl:    // mdoc(7) "FLags"
5116             {
5117                 //trans_char(c, '"', '\a');
5118                 c += j;
5119                 QList<char*> argPointers;
5120                 getArguments(c, args, &argPointers);
5121                 out_html(set_font("B"));
5122                 out_html("-");
5123                 if ( args.count() == 0 )
5124                 {
5125                     /*out_html("-");*/ // stdin or stdout
5126                 }
5127                 else
5128                 {
5129                     if ( argPointers.count() )
5130                         scan_troff_mandoc(argPointers[0], 1, nullptr);
5131                     /*
5132                     for (i = 0; i < args.count(); ++i)
5133                     {
5134                       if (ispunct(args[i][0]) && args[i][0] != '-')
5135                       {
5136                         scan_troff_mandoc(argPointers[i], 1, NULL);
5137                       }
5138                       else
5139                       {
5140                         if (i > 0)
5141                           out_html(" "); // Put a space between flags
5142                         out_html("-");
5143                         scan_troff_mandoc(argPointers[i], 1, NULL);
5144                       }
5145                     }
5146                     */
5147                 }
5148                 out_html(set_font("R"));
5149                 out_html(NEWLINE);
5150                 if (fillout)
5151                     curpos++;
5152                 else
5153                     curpos = 0;
5154                 break;
5155             }
5156             case REQ_Pa:    /* mdoc(7) */
5157             case REQ_Pf:    /* mdoc(7) */
5158             {
5159                 trans_char(c, '"', '\a');
5160                 c = c + j;
5161                 if (*c == '\n') c++;
5162                 c = scan_troff_mandoc(c, 1, nullptr);
5163                 out_html(NEWLINE);
5164                 if (fillout)
5165                     curpos++;
5166                 else
5167                     curpos = 0;
5168                 break;
5169             }
5170             case REQ_Pp:    /* mdoc(7) */
5171             {
5172                 if (fillout)
5173                     out_html("<br><br>\n");
5174                 else
5175                 {
5176                     out_html(NEWLINE);
5177                 }
5178                 curpos = 0;
5179                 c = skip_till_newline(c);
5180                 break;
5181             }
5182             case REQ_Aq: // mdoc(7) "Angle bracket Quote"
5183                 c = process_quote(c, j, "&lt;", "&gt;");
5184                 break;
5185             case REQ_Bq: // mdoc(7) "Bracket Quote"
5186                 c = process_quote(c, j, "[", "]");
5187                 break;
5188             case REQ_Dq:    // mdoc(7) "Double Quote"
5189                 c = process_quote(c, j, "&ldquo;", "&rdquo;");
5190                 break;
5191             case REQ_Pq:    // mdoc(7) "Parenthese Quote"
5192                 c = process_quote(c, j, "(", ")");
5193                 break;
5194             case REQ_Qq:    // mdoc(7) "straight double Quote"
5195                 c = process_quote(c, j, "&quot;", "&quot;");
5196                 break;
5197             case REQ_Sq:    // mdoc(7) "Single Quote"
5198                 c = process_quote(c, j, "&lsquo;", "&rsquo;");
5199                 break;
5200             case REQ_Op:    /* mdoc(7) */
5201             {
5202                 trans_char(c, '"', '\a');
5203                 c += j;
5204                 if (*c == '\n') c++;
5205                 out_html(set_font("R"));
5206                 out_html("[");
5207                 c = scan_troff_mandoc(c, 1, nullptr);
5208                 out_html(set_font("R"));
5209                 out_html("]");
5210                 out_html(NEWLINE);
5211                 if (fillout)
5212                     curpos++;
5213                 else
5214                     curpos = 0;
5215                 break;
5216             }
5217             case REQ_Oo:    /* mdoc(7) */
5218             {
5219                 trans_char(c, '"', '\a');
5220                 c += j;
5221                 if (*c == '\n') c++;
5222                 out_html(set_font("R"));
5223                 out_html("[");
5224                 c = scan_troff_mandoc(c, 1, nullptr);
5225                 if (fillout)
5226                     curpos++;
5227                 else
5228                     curpos = 0;
5229                 break;
5230             }
5231             case REQ_Oc:    /* mdoc(7) */
5232             {
5233                 trans_char(c, '"', '\a');
5234                 c += j;
5235                 out_html(set_font("R"));
5236                 out_html("]");
5237                 c = scan_troff_mandoc(c, 1, nullptr);
5238                 if (fillout)
5239                     curpos++;
5240                 else
5241                     curpos = 0;
5242                 break;
5243             }
5244             case REQ_Ql:    /* mdoc(7) */
5245             {
5246                 /* Single quote first word in the line */
5247                 char *sp;
5248                 trans_char(c, '"', '\a');
5249                 c = c + j;
5250                 if (*c == '\n') c++;
5251                 sp = c;
5252                 do
5253                 {
5254                     /* Find first whitespace after the
5255                      * first word that isn't a mandoc macro
5256                      */
5257                     while (*sp && isspace(*sp)) sp++;
5258                     while (*sp && !isspace(*sp)) sp++;
5259                 }
5260                 while (*sp && isupper(*(sp - 2)) && islower(*(sp - 1)));
5261 
5262                 /* Use a newline to mark the end of text to
5263                  * be quoted
5264                  */
5265                 if (*sp) *sp = '\n';
5266                 out_html("`");    /* Quote the text */
5267                 c = scan_troff_mandoc(c, 1, nullptr);
5268                 out_html("'");
5269                 out_html(NEWLINE);
5270                 if (fillout)
5271                     curpos++;
5272                 else
5273                     curpos = 0;
5274                 break;
5275             }
5276             case REQ_Ar:    /* mdoc(7) */
5277             {
5278                 /* parse one line in italics */
5279                 out_html(set_font("I"));
5280                 c += j;
5281                 QList<char*> argPointers;
5282                 getArguments(c, args, &argPointers);
5283                 if ( args.count() == 0 )
5284                 {
5285                     // An empty Ar means "file ..."
5286                     out_html("file ...");
5287                 }
5288                 else
5289                 {
5290                     if ( argPointers.count() )
5291                         c = scan_troff_mandoc(argPointers[0], 1, nullptr);
5292                 }
5293 
5294                 out_html(set_font("R"));
5295                 out_html(NEWLINE);
5296                 if (fillout)
5297                     curpos++;
5298                 else
5299                     curpos = 0;
5300                 break;
5301             }
5302             case REQ_Em:    /* mdoc(7) */
5303             {
5304                 out_html("<em>");
5305                 trans_char(c, '"', '\a');
5306                 c += j;
5307                 if (*c == '\n') c++;
5308                 c = scan_troff_mandoc(c, 1, nullptr);
5309                 out_html("</em>");
5310                 out_html(NEWLINE);
5311                 if (fillout)
5312                     curpos++;
5313                 else
5314                     curpos = 0;
5315                 break;
5316             }
5317             case REQ_Ad:    /* mdoc(7) */
5318             case REQ_Va:    /* mdoc(7) */
5319             case REQ_Xo:    /* mdoc(7) */
5320             case REQ_Xc:    /* mdoc(7) */
5321             {
5322                 /* parse one line in italics */
5323                 out_html(set_font("I"));
5324                 trans_char(c, '"', '\a');
5325                 c = c + j;
5326                 if (*c == '\n') c++;
5327                 c = scan_troff_mandoc(c, 1, nullptr);
5328                 out_html(set_font("R"));
5329                 out_html(NEWLINE);
5330                 if (fillout)
5331                     curpos++;
5332                 else
5333                     curpos = 0;
5334                 break;
5335             }
5336             case REQ_Nd:    /* mdoc(7) */
5337             {
5338                 trans_char(c, '"', '\a');
5339                 c = c + j;
5340                 if (*c == '\n') c++;
5341                 out_html(" - ");
5342                 c = scan_troff_mandoc(c, 1, nullptr);
5343                 out_html(NEWLINE);
5344                 if (fillout)
5345                     curpos++;
5346                 else
5347                     curpos = 0;
5348                 break;
5349             }
5350             case REQ_Nm:    // mdoc(7) "Name Macro"
5351             {
5352                 c += j;
5353                 QList<char*> argPointers;
5354                 getArguments(c, args, &argPointers);
5355 
5356                 if ( mandoc_name.isEmpty() && args.count() )
5357                     mandoc_name = args[0];
5358 
5359                 if ( mandoc_synopsis )
5360                 {
5361                     /* Break lines only in the Synopsis.
5362                      * The Synopsis section seems to be treated
5363                      * as a special case - Bummer!
5364                      * Do not insert a break before the very first Nm in this section
5365                      */
5366 
5367                     if ( mandoc_name_count )
5368                         out_html("<BR>");
5369 
5370                     mandoc_name_count++;
5371                 }
5372 
5373                 out_html(set_font("B"));
5374 
5375                 // only show name if
5376                 // .Nm (first not-null-length defined name)
5377                 // .Nm name
5378                 // do not show
5379                 // .Nm ""
5380                 if ( args.count() == 0 )
5381                     scan_troff(mandoc_name.data(), 0, nullptr);
5382                 else
5383                 {
5384                     if ( argPointers.count() )
5385                         c = scan_troff_mandoc(argPointers[0], 1, nullptr);
5386                 }
5387 
5388                 out_html(set_font("R"));
5389 
5390                 if (fillout)
5391                     curpos++;
5392                 else
5393                     curpos = 0;
5394                 break;
5395             }
5396             case REQ_Cd:    /* mdoc(7) */
5397             case REQ_Cm:    /* mdoc(7) */
5398             case REQ_Ic:    /* mdoc(7) */
5399             case REQ_Ms:    /* mdoc(7) */
5400             case REQ_Or:    /* mdoc(7) */
5401             case REQ_Sy:    /* mdoc(7) */
5402             {
5403                 /* parse one line in bold */
5404                 out_html(set_font("B"));
5405                 trans_char(c, '"', '\a');
5406                 c = c + j;
5407                 if (*c == '\n') c++;
5408                 c = scan_troff_mandoc(c, 1, nullptr);
5409                 out_html(set_font("R"));
5410                 out_html(NEWLINE);
5411                 if (fillout)
5412                     curpos++;
5413                 else
5414                     curpos = 0;
5415                 break;
5416             }
5417             case REQ_Ta:    /* mdoc(7) */
5418             {
5419                 // ### FIXME: this is a simplification
5420                 // for a list item element in a ".Bl -tag -width indent" type list
5421                 // man:mdoc says: "indent == Six constant width spaces"
5422                 out_html("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
5423                 c = c + j;
5424                 if (*c == '\n') c++;
5425                 break;
5426             }
5427             // ### FIXME: punctuation is handled badly!
5428             case REQ_Dv:    /* mdoc(7) */
5429             case REQ_Ev:    /* mdoc(7) */
5430             case REQ_Fr:    /* mdoc(7) */
5431             case REQ_Li:    /* mdoc(7) */
5432             case REQ_nN:    /* mdoc(7) */
5433             {
5434                 trans_char(c, '"', '\a');
5435                 c += j;
5436                 if (*c == '\n') c++;
5437                 out_html(set_font("B"));
5438                 c = scan_troff_mandoc(c, 1, nullptr);
5439                 out_html(set_font("R"));
5440                 out_html(NEWLINE);
5441                 if (fillout)
5442                     curpos++;
5443                 else
5444                     curpos = 0;
5445                 break;
5446             }
5447             case REQ_Tn:    /* mdoc(7) Trade Names ... prints its arguments in a smaller font */
5448             {
5449                 trans_char(c, '"', '\a');
5450                 c += j;
5451                 if (*c == '\n') c++;
5452                 out_html("<small>");
5453                 c = scan_troff_mandoc(c, 1, NULL);
5454                 out_html("</small>");
5455                 if (fillout)
5456                     curpos++;
5457                 else
5458                     curpos = 0;
5459                 break;
5460             }
5461             case REQ_Ns:    /* mdoc(7) No-Space Macro */
5462             {
5463                 c += j;
5464                 while (*c && isspace(*c) && (*c != '\n')) c++;
5465                 Q_FALLTHROUGH(); // (The '.Ns' macro always invokes the '.No' macro...)
5466             }
5467             case REQ_No:    /* mdoc(7) Normal Text Macro */
5468             {
5469                 if ( request == REQ_No ) // not fallen through from REQ_Ns
5470                 {
5471                     trans_char(c, '"', '\a');
5472                     c += j;
5473                     if (*c == '\n') c++;
5474                 }
5475                 out_html("<span style=\"font-style:normal\">");
5476                 c = scan_troff_mandoc(c, 1, NULL);
5477                 out_html("</span>");
5478                 out_html(NEWLINE);
5479                 if (fillout)
5480                     curpos++;
5481                 else
5482                     curpos = 0;
5483                 break;
5484             }
5485             case REQ_perc_A:    /* mdoc(7) biblio stuff */
5486             case REQ_perc_D:
5487             case REQ_perc_N:
5488             case REQ_perc_O:
5489             case REQ_perc_P:
5490             case REQ_perc_Q:
5491             case REQ_perc_V:
5492             {
5493                 c = c + j;
5494                 if (*c == '\n') c++;
5495                 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */
5496                 if (fillout)
5497                     curpos++;
5498                 else
5499                     curpos = 0;
5500                 break;
5501             }
5502             case REQ_perc_B:
5503             case REQ_perc_J:
5504             case REQ_perc_R:
5505             case REQ_perc_T:
5506             {
5507                 c = c + j;
5508                 out_html(set_font("I"));
5509                 if (*c == '\n') c++;
5510                 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */
5511                 out_html(set_font("R"));
5512                 if (fillout)
5513                     curpos++;
5514                 else
5515                     curpos = 0;
5516                 break;
5517             }
5518             case REQ_URL: // man(7) ".URL url link trailer"
5519             {
5520                 c += j;
5521 
5522                 getArguments(c, args);
5523                 ignore_links = true;
5524                 out_html("<a href=\"");
5525 
5526                 if ( args.count() > 0 )
5527                     scan_troff(args[0].data(), 0, nullptr);
5528 
5529                 out_html("\">");
5530                 if ( args.count() > 1 )
5531                     scan_troff(args[1].data(), 0, nullptr);
5532 
5533                 out_html("</a>\n");  // trailing newline important to make ignore_links work
5534                 ignore_links = false;
5535 
5536                 if ( args.count() > 2 )
5537                     scan_troff(args[2].data(), 1, nullptr);
5538 
5539                 break;
5540             }
5541             case REQ_tr:  // translate   TODO
5542             {
5543                 c = skip_till_newline(c);
5544                 break;
5545             }
5546             case REQ_nroff: // groff(7)  "NROFF mode"
5547                 mode = true;
5548                 Q_FALLTHROUGH();
5549             case REQ_troff: // groff(7) "TROFF mode"
5550             {
5551                 s_nroff = mode;
5552                 c += j;
5553                 c = skip_till_newline(c);
5554                 break;
5555             }
5556             case REQ_als: // groff(7) "ALias String"
5557             {
5558                 /*
5559                  * Note an alias is supposed to be something like a hard link
5560                  * However to make it simplier, we only copy the string.
5561                  */
5562                 // Be careful: unlike .rn, the destination is first, origin is second
5563                 qCDebug(KIO_MAN_LOG) << "start .als";
5564                 c += j;
5565                 const QByteArray name(scan_identifier(c));
5566                 if (name.isEmpty())
5567                 {
5568                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to alias";
5569                     break;
5570                 }
5571                 while (*c && isspace(*c) && *c != '\n') ++c;
5572                 const QByteArray name2(scan_identifier(c));
5573                 if (name2.isEmpty())
5574                 {
5575                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to alias";
5576                     break;
5577                 }
5578                 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name);
5579                 c = skip_till_newline(c);
5580                 if (name == name2)
5581                 {
5582                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination string to alias: " << BYTEARRAY(name);
5583                     break;
5584                 }
5585                 // Second parameter is origin (unlike in .rn)
5586                 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name2);
5587                 if (it == s_stringDefinitionMap.end())
5588                 {
5589                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias of " << BYTEARRAY(name2);
5590                 }
5591                 else
5592                 {
5593                     StringDefinition def = (*it);
5594                     s_stringDefinitionMap.insert(name, def);
5595                 }
5596                 qCDebug(KIO_MAN_LOG) << "end .als";
5597                 break;
5598             }
5599             case REQ_rr: // groff(7) "Remove number Register"
5600             {
5601                 qCDebug(KIO_MAN_LOG) << "start .rr";
5602                 c += j;
5603                 const QByteArray name(scan_identifier(c));
5604                 if (name.isEmpty())
5605                 {
5606                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename: ";
5607                     break;
5608                 }
5609                 c = skip_till_newline(c);
5610                 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
5611                 if (it == s_numberDefinitionMap.end())
5612                 {
5613                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: trying to remove inexistant number register: ";
5614                 }
5615                 else
5616                 {
5617                     s_numberDefinitionMap.remove(name);
5618                 }
5619                 qCDebug(KIO_MAN_LOG) << "end .rr";
5620                 break;
5621             }
5622             case REQ_rnn: // groff(7) "ReName Number register"
5623             {
5624                 qCDebug(KIO_MAN_LOG) << "start .rnn";
5625                 c += j;
5626                 const QByteArray name(scan_identifier(c));
5627                 if (name.isEmpty())
5628                 {
5629                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin to remove/rename number register";
5630                     break;
5631                 }
5632                 while (*c && isspace(*c) && *c != '\n') ++c;
5633                 const QByteArray name2(scan_identifier(c));
5634                 if (name2.isEmpty())
5635                 {
5636                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination to rename number register";
5637                     break;
5638                 }
5639                 c = skip_till_newline(c);
5640                 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
5641                 if (it == s_numberDefinitionMap.end())
5642                 {
5643                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find number register to rename" << BYTEARRAY(name);
5644                 }
5645                 else
5646                 {
5647                     NumberDefinition def = (*it);
5648                     s_numberDefinitionMap.remove(name); // ### QT4: removeAll
5649                     s_numberDefinitionMap.insert(name2, def);
5650                 }
5651                 qCDebug(KIO_MAN_LOG) << "end .rnn";
5652                 break;
5653             }
5654             case REQ_aln: // groff(7) "ALias Number Register"
5655             {
5656                 /*
5657                 * Note an alias is supposed to be something like a hard link
5658                 * However to make it simplier, we only copy the string.
5659                 */
5660                 // Be careful: unlike .rnn, the destination is first, origin is second
5661                 qCDebug(KIO_MAN_LOG) << "start .aln";
5662                 c += j;
5663                 const QByteArray name(scan_identifier(c));
5664                 if (name.isEmpty())
5665                 {
5666                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination number register to alias";
5667                     break;
5668                 }
5669                 while (*c && isspace(*c) && *c != '\n') ++c;
5670                 const QByteArray name2(scan_identifier(c));
5671                 if (name2.isEmpty())
5672                 {
5673                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin number register to alias";
5674                     break;
5675                 }
5676                 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name);
5677                 c = skip_till_newline(c);
5678                 if (name == name2)
5679                 {
5680                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination number register to alias: " << BYTEARRAY(name);
5681                     break;
5682                 }
5683                 // Second parameter is origin (unlike in .rnn)
5684                 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name2);
5685                 if (it == s_numberDefinitionMap.end())
5686                 {
5687                     qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias: " << BYTEARRAY(name2);
5688                 }
5689                 else
5690                 {
5691                     NumberDefinition def = (*it);
5692                     s_numberDefinitionMap.insert(name, def);
5693                 }
5694                 qCDebug(KIO_MAN_LOG) << "end .aln";
5695                 break;
5696             }
5697             case REQ_shift: // groff(7) "SHIFT parameter"
5698             {
5699                 c += j;
5700                 h = c;
5701                 while (*h && *h != '\n' && isdigit(*h)) ++h;
5702                 const char tempchar = *h;
5703                 *h = 0;
5704                 const QByteArray number(c);
5705                 *h = tempchar;
5706                 c = skip_till_newline(h);
5707                 unsigned int result = 1; // Numbers of shifts to do
5708                 if (!number.isEmpty())
5709                 {
5710                     bool ok = false;
5711                     result = number.toUInt(&ok);
5712                     if (!ok || result < 1)
5713                         result = 1;
5714                 }
5715                 for (unsigned int num = 0; num < result; ++num)
5716                 {
5717                     if (!s_argumentList.isEmpty())
5718                         s_argumentList.pop_front();
5719                 }
5720                 break;
5721             }
5722             case REQ_while: // groff(7) "WHILE loop"
5723             {
5724                 request_while(c, j, mandoc_command);
5725                 break;
5726             }
5727             case REQ_break: // groff(7) Break out of a while loop.
5728             {
5729                 c += j;
5730                 break_the_while_loop = true;
5731                 break;
5732             }
5733             case REQ_do: // groff(7) "DO command"
5734             {
5735                 // ### HACK: we just replace do by a \n and a .
5736                 *c = '\n';
5737                 c++;
5738                 *c = '.';
5739                 // The . will be treated as next character
5740                 break;
5741             }
5742             case REQ_nop:  // groff(7) nop
5743             {
5744                 c += j;
5745                 break;
5746             }
5747             default:
5748             {
5749                 if (mandoc_command &&
5750                         ((isupper(*c) && islower(*(c + 1)))
5751                          || (islower(*c) && isupper(*(c + 1)))))
5752                 {
5753                     /* Let through any mdoc(7) commands that haven't
5754                      * been delt with.
5755                      * I don't want to miss anything out of the text.
5756                      */
5757                     char buf[4] = { c[0], c[1], ' ', 0 };
5758                     out_html(buf);    /* Print the command (it might just be text). */
5759                     c = c + j;
5760                     trans_char(c, '"', '\a');
5761                     if (*c == '\n') c++;
5762                     out_html(set_font("R"));
5763                     c = scan_troff(c, 1, nullptr);
5764                     out_html(NEWLINE);
5765                     if (fillout)
5766                         curpos++;
5767                     else
5768                         curpos = 0;
5769                 }
5770                 else
5771                     c = skip_till_newline(c);
5772                 break;
5773             }
5774             }
5775         }
5776     }
5777     if (fillout)
5778     {
5779         out_html(NEWLINE);
5780         curpos++;
5781     }
5782     return c;
5783 }
5784 
5785 //---------------------------------------------------------------------
5786 
5787 static int contained_tab = 0;
5788 static bool mandoc_line = false; // Signals whether to look for embedded mandoc commands.
5789 
scan_troff(char * c,bool san,char ** result)5790 static char *scan_troff(char *c, bool san, char **result)
5791 {   /* san : stop at newline */
5792     QByteArray intbuff;
5793     intbuff.reserve(MED_STR_MAX);
5794 #define FLUSHIBP  { out_html(intbuff); intbuff.clear(); }
5795     char *exbuffer;
5796     int exbuffpos, exbuffmax, exnewline_for_fun;
5797     bool exscaninbuff;
5798     int usenbsp = 0;
5799 
5800     exbuffer = buffer;
5801     exbuffpos = buffpos;
5802     exbuffmax = buffmax;
5803     exnewline_for_fun = newline_for_fun;
5804     exscaninbuff = scaninbuff;
5805     newline_for_fun = 0;
5806     if (result)
5807     {
5808         if (*result)
5809         {
5810             buffer = *result;
5811             buffpos = qstrlen(buffer);
5812             buffmax = buffpos;
5813         }
5814         else
5815         {
5816             buffer = new char[LARGE_STR_MAX + 1];
5817             buffpos = 0;
5818             buffmax = LARGE_STR_MAX;
5819         }
5820         scaninbuff = true;
5821     }
5822     char *h = c; // ### FIXME below are too many tests that may go before the position of c
5823     /* start scanning */
5824 
5825     while (h && *h && (!san || newline_for_fun || (*h != '\n')) && !break_the_while_loop)
5826     {
5827         if (*h == escapesym)
5828         {
5829             h++;
5830             FLUSHIBP;
5831             // ###HACK: I think after escape expansion, the line should be reparsed
5832             // (this seems to be what troff does), but it would double-escape
5833             // HTML chars, e.g. the first escape produces "<span...", the second
5834             // would change that to &lt;span...
5835             // Therefore work around some man pages (e.g. nmap, smb.conf),
5836             // which have \." at beginning of
5837             // line (probably just typos), but troff would skip these
5838             if ( (h[-2] == '\n') && (*h == '.') )  // when line starts with \. ignore line
5839             {
5840                 while (*h && (*h != '\n')) h++;
5841                 continue;  // avoid h++ at the end
5842             }
5843             else
5844             {
5845                 h = scan_escape(h);
5846             }
5847         }
5848         else if (*h == controlsym && h[-1] == '\n')
5849         {
5850             h++;
5851             FLUSHIBP;
5852             h = scan_request(h);
5853             if (h && san && h[-1] == '\n') h--;
5854         }
5855         else if (mandoc_line
5856                  && ((*(h - 1)) && (isspace(*(h - 1)) || (*(h - 1)) == '\n'))
5857                  && *(h) && isupper(*(h))
5858                  && *(h + 1) && islower(*(h + 1))
5859                  && *(h + 2) && isspace(*(h + 2)))
5860         {
5861             // mdoc(7) embedded command eg ".It Fl Ar arg1 Fl Ar arg2"
5862             FLUSHIBP;
5863             h = scan_request(h);
5864             if (san && h[-1] == '\n')
5865             {
5866                 h--;
5867                 break;
5868             }
5869         }
5870         else if (*h == nobreaksym && h[-1] == '\n')
5871         {
5872             h++;
5873             FLUSHIBP;
5874             h = scan_request(h);
5875             if (san && h[-1] == '\n') h--;
5876         }
5877         else
5878         {
5879             if (still_dd && isalnum(*h) && h[-1] == '\n')
5880             {
5881                 /* sometimes a .HP request is not followed by a .br request */
5882                 FLUSHIBP;
5883                 out_html("<DD>");
5884                 curpos = 0;
5885                 still_dd = false;
5886             }
5887             switch (*h)
5888             {
5889             case '&':
5890             {
5891                 intbuff += "&amp;";
5892                 curpos++;
5893                 break;
5894             }
5895             case '<':
5896             {
5897                 intbuff += "&lt;";
5898                 curpos++;
5899                 break;
5900             }
5901             case '>':
5902             {
5903                 intbuff += "&gt;";
5904                 curpos++;
5905                 break;
5906             }
5907             case '"':
5908             {
5909                 intbuff += "&quot;";
5910                 curpos++;
5911                 break;
5912             }
5913             case '\n':
5914             {
5915                 if (h != c && h[-1] == '\n' && fillout)
5916                 {
5917                     intbuff += "<p>";
5918                 }
5919                 if (contained_tab && fillout)
5920                 {
5921                     intbuff += "<br>";
5922                 }
5923                 contained_tab = 0;
5924                 curpos = 0;
5925                 usenbsp = 0;
5926                 intbuff += '\n';
5927                 FLUSHIBP;
5928                 break;
5929             }
5930             case '\t':
5931             {
5932                 int curtab = 0;
5933                 contained_tab = 1;
5934                 FLUSHIBP;
5935                 /* like a typewriter, not like TeX */
5936                 tabstops[19] = curpos + 1;
5937                 while (curtab < maxtstop && tabstops[curtab] <= curpos)
5938                     curtab++;
5939                 if (curtab < maxtstop)
5940                 {
5941                     if (!fillout)
5942                     {
5943                         while (curpos < tabstops[curtab])
5944                         {
5945                             intbuff += ' ';
5946                             if (intbuff.length() > MED_STR_MAX)
5947                             {
5948                                 FLUSHIBP;
5949                             }
5950                             curpos++;
5951                         }
5952                     }
5953                     else
5954                     {
5955                         out_html("<TT>");
5956                         while (curpos < tabstops[curtab])
5957                         {
5958                             out_html("&nbsp;");
5959                             curpos++;
5960                         }
5961                         out_html("</TT>");
5962                     }
5963                 }
5964                 break;
5965             }
5966             default:
5967             {
5968                 if (*h == ' ' && (h[-1] == '\n' || usenbsp))
5969                 {
5970                     FLUSHIBP;
5971                     if (!usenbsp && fillout)
5972                     {
5973                         out_html("<BR>");
5974                         curpos = 0;
5975                     }
5976                     usenbsp = fillout;
5977                     if (usenbsp)
5978                         out_html("&nbsp;");
5979                     else
5980                         intbuff += ' ';
5981                 }
5982                 else if (*h > 31 && *h < 127) intbuff += *h;
5983                 else if (((unsigned char)(*h)) > 127)
5984                 {
5985                     intbuff += *h;
5986                 }
5987                 curpos++;
5988                 break;
5989             }
5990             }
5991             if ( intbuff.length() > MED_STR_MAX ) FLUSHIBP;
5992             h++;
5993         }
5994     }
5995     FLUSHIBP;
5996     if (buffer) buffer[buffpos] = '\0';
5997     if (san && h && *h) h++;
5998     newline_for_fun = exnewline_for_fun;
5999     if (result)
6000     {
6001         *result = buffer;
6002         buffer = exbuffer;
6003         buffpos = exbuffpos;
6004         buffmax = exbuffmax;
6005         scaninbuff = exscaninbuff;
6006     }
6007 
6008     return h;
6009 }
6010 
6011 //---------------------------------------------------------------------
6012 
scan_troff_mandoc(char * c,bool san,char ** result)6013 static char *scan_troff_mandoc(char *c, bool san, char **result)
6014 {
6015     char *ret;
6016     char *end = c;
6017     bool oldval = mandoc_line;
6018     mandoc_line = true;
6019     while (*end && *end != '\n')
6020     {
6021         end++;
6022     }
6023 
6024     if (end > c + 2
6025             && ispunct(*(end - 1))
6026             && isspace(*(end - 2)) && *(end - 2) != '\n')
6027     {
6028         /* Don't format lonely punctuation E.g. in "xyz ," format
6029          * the xyz and then append the comma removing the space.
6030          */
6031         *(end - 2) = '\n';
6032         ret = scan_troff(c, san, result);
6033         *end = 0;
6034         out_html(end - 1);  // output the punct char
6035         *end = '\n';
6036         ret = end;
6037     }
6038     else
6039     {
6040         ret = scan_troff(c, san, result);
6041     }
6042     mandoc_line = oldval;
6043     return ret;
6044 }
6045 
6046 //---------------------------------------------------------------------
6047 // Entry point
6048 
scan_man_page(const char * man_page)6049 void scan_man_page(const char *man_page)
6050 {
6051     if (!man_page)
6052         return;
6053 
6054     qCDebug(KIO_MAN_LOG) << "Start scanning man page";
6055 
6056     // ### Do more init
6057     // Unlike man2html, we actually call this several times, hence the need to
6058     // properly cleanup all those static vars
6059     s_ifelseval.clear();
6060 
6061     s_characterDefinitionMap.clear();
6062     InitCharacterDefinitions();
6063 
6064     s_stringDefinitionMap.clear();
6065     InitStringDefinitions();
6066 
6067     s_numberDefinitionMap.clear();
6068     InitNumberDefinitions();
6069 
6070     s_argumentList.clear();
6071     listItemStack.clear();
6072 
6073     in_div = 0;
6074 
6075     s_dollarZero = ""; // No macro called yet!
6076     mandoc_name = "";
6077 
6078     output_possible = false;
6079     int strLength = qstrlen(man_page);
6080     char *buf = new char[strLength + 2];
6081     qstrcpy(buf + 1, man_page);
6082     buf[0] = '\n';
6083 
6084     qCDebug(KIO_MAN_LOG) << "Parse man page";
6085 
6086     scan_troff(buf + 1, 0, nullptr);
6087 
6088     qCDebug(KIO_MAN_LOG) << "Man page parsed!";
6089 
6090     while (itemdepth || dl_set[itemdepth])
6091     {
6092         checkListStack();
6093         out_html("</DL>\n");
6094         if (dl_set[itemdepth]) dl_set[itemdepth] = 0;
6095         else if (itemdepth > 0) itemdepth--;
6096     }
6097 
6098     out_html(set_font("R"));
6099     out_html(change_to_size(0));
6100     if (!fillout)
6101     {
6102         fillout = 1;
6103         out_html("</PRE>");
6104     }
6105     out_html(NEWLINE);
6106 
6107     if (in_div)
6108     {
6109         output_real("</div><div style=\"margin-left: 2cm\">\n");
6110         in_div = 0;
6111     }
6112 
6113     if (output_possible)
6114     {
6115         // The output is buggy wrt to how divs are handled.  Fixing it would
6116         // require closing divs before other block-level elements are output,
6117         // and I do not feel like going to find them all.
6118         output_real("</div></div></div></div>\n");
6119 
6120         output_real("<div id=\"footer\"><div id=\"footer_text\">\n");
6121 #ifdef SIMPLE_MAN2HTML
6122         output_real("Generated by kio_man");
6123 #else
6124         output_real("Generated by kio_man version ");
6125         output_real(QString(KDE_VERSION_STRING).toHtmlEscaped().toLocal8Bit());
6126 #endif
6127         output_real("</div></div>\n\n");
6128 
6129         output_real("</BODY>\n</HTML>\n");
6130     }
6131     delete [] buf;
6132 
6133     // Release memory
6134     s_characterDefinitionMap.clear();
6135     s_stringDefinitionMap.clear();
6136     s_numberDefinitionMap.clear();
6137     s_argumentList.clear();
6138 
6139     // reinit static variables for reuse
6140     delete [] buffer;
6141     buffer = nullptr;
6142 
6143     escapesym = '\\';
6144     nobreaksym = '\'';
6145     controlsym = '.';
6146     fieldsym = 0;
6147     padsym = 0;
6148 
6149     buffpos = 0;
6150     buffmax = 0;
6151     scaninbuff = false;
6152     itemdepth = 0;
6153     for (int i = 0; i < 20; i++)
6154         dl_set[i] = 0;
6155     still_dd = false;
6156     for (int i = 0; i < 12; i++)
6157         tabstops[i] = (i + 1) * 8;
6158     maxtstop = 12;
6159     curpos = 0;
6160 
6161     mandoc_name_count = 0;
6162 }
6163 
6164 //---------------------------------------------------------------------
6165 
manPageToUtf8(const QByteArray & input,const QByteArray & dirName)6166 char *manPageToUtf8(const QByteArray &input, const QByteArray &dirName)
6167 {
6168     // as we do not know in which encoding the man source is, try to automatically
6169     // detect it and always return it as UTF-8
6170 
6171     QByteArray encoding;
6172 
6173     // some pages contain "coding:" information. See "man manconv"
6174     // (but I find pages which do not exactly obey the format described in manconv, e.g.
6175     // the control char is either "." or "'")
6176     // Therefore use a QRegularExpression
6177     const QRegularExpression regex("[\\.']\\\\\"[^$]*coding:\\s*(\\S*)\\s", QRegularExpression::CaseInsensitiveOption);
6178     QRegularExpressionMatch rmatch;
6179     if (QString::fromLatin1(input).indexOf(regex, 0, &rmatch) == 0)
6180     {
6181         encoding = rmatch.captured(1).toLatin1();
6182 
6183         qCDebug(KIO_MAN_LOG) << "found embedded encoding" << encoding;
6184     }
6185     else
6186     {
6187         // check according to the dirName the man page is in
6188 
6189         // if the dirName contains a ".", the encoding follows, e.g. "de.UTF-8"
6190         int dot = dirName.indexOf('.');
6191         if ( dot != -1 )
6192         {
6193             encoding = dirName.mid(dot + 1);
6194         }
6195         else
6196         {
6197             /* wanted to use KEncodingProber ... however it fails and gives very unreliable
6198                results ... telling me often UTF-8 encoded pages are EUC-JP or gb18030 ...
6199                In fact all man pages here on openSuse are encoded in UTF-8
6200 
6201             KEncodingProber encodingProber;
6202             encodingProber.feed(input);
6203 
6204             qCDebug(KIO_MAN_LOG) << "auto-detect encoding; guess=" << encodingProber.encoding()
6205                          << "confidence=" << encodingProber.confidence();
6206 
6207             encoding = encodingProber.encoding();
6208             */
6209 
6210             // the original bug report #141340
6211             // mentioned the env var MAN_ICONV_INPUT_CHARSET ... let's check if it is set
6212             // This seems not be a std. man-db env var, but I find several traces of it on the web
6213             encoding = qgetenv("MAN_ICONV_INPUT_CHARSET");
6214 
6215             if ( encoding.isEmpty() )
6216                 encoding = "UTF-8";
6217         }
6218     }
6219 
6220     QTextCodec *codec = 0;
6221 
6222     if ( !encoding.isEmpty() )
6223         codec = QTextCodec::codecForName(encoding);
6224 
6225     if ( !codec ) // fallback encoding
6226         codec = QTextCodec::codecForName("ISO-8859-1");
6227 
6228     qCDebug(KIO_MAN_LOG) << "using the encoding" << codec->name() << "for file in dir" << dirName;
6229 
6230     QString out = codec->toUnicode(input);
6231     QByteArray array = out.toUtf8();
6232 
6233     // TODO get rid of this double allocation and scan a QByteArray
6234     const int len = array.size();
6235     char *buf = new char[len + 4];
6236     memmove(buf + 1, array.data(), len);
6237     buf[0] = buf[len+1] = '\n'; // Start and end with an end of line
6238     buf[len+2] = buf[len+3] = '\0'; // Two NUL characters at end
6239 
6240     return buf;
6241 }
6242 
6243 //---------------------------------------------------------------------
6244 
6245 #ifdef SIMPLE_MAN2HTML
output_real(const char * insert)6246 void output_real(const char *insert)
6247 {
6248     std::cout << insert;
6249 }
6250 
read_man_page(const char * filename)6251 char *read_man_page(const char *filename)
6252 {
6253 #if KARCHIVE_VERSION >= QT_VERSION_CHECK(5, 85, 0)
6254     KCompressionDevice fd(QFile::decodeName(filename));
6255 #else
6256     KFilterDev fd(QFile::decodeName(filename));
6257 #endif
6258     if ( !fd.open(QIODevice::ReadOnly) )
6259     {
6260         std::cerr << "read_man_page: can not open " << filename << std::endl;
6261         return nullptr;
6262     }
6263 
6264     QDir dir(QFileInfo(QFile::decodeName(filename)).dir());
6265     dir.cdUp();
6266     char *data = manPageToUtf8(fd.readAll(), QFile::encodeName(dir.dirName()));
6267 
6268     return data;
6269 }
6270 
6271 //--------------------------------------------------------------------------------
6272 
6273 #ifndef KIO_MAN_TEST
main(int argc,char ** argv)6274 int main(int argc, char **argv)
6275 {
6276     if (argc < 2)
6277     {
6278         std::cerr << "call: " << argv[0] << " <filename>\n";
6279         return 1;
6280     }
6281     if (chdir(argv[1]))
6282     {
6283         char *buf = read_man_page(argv[1]);
6284         if (buf)
6285         {
6286             scan_man_page(buf);
6287             delete [] buf;
6288         }
6289     }
6290     else
6291     {
6292         DIR *dir = opendir(".");
6293         struct dirent *ent;
6294         while ((ent = readdir(dir)) != nullptr)
6295         {
6296             std::cerr << "converting " << ent->d_name << std::endl;
6297             char *buf = read_man_page(ent->d_name);
6298             if (buf)
6299             {
6300                 scan_man_page(buf);
6301                 delete [] buf;
6302             }
6303         }
6304         closedir(dir);
6305     }
6306     return 0;
6307 }
6308 #endif
6309 
6310 
6311 #endif
6312 
6313 // kate: indent-mode cstyle; space-indent on; indent-width 2; replace-tabs on;
6314