1 /*
2 This file is part of the KDE libraries
3
4 SPDX-FileCopyrightText: 2000 Stephan Kulow <coolo@kde.org>
5 SPDX-FileCopyrightText: 2005 Nicolas GOUTTE <goutte@kde.org>
6 SPDX-FileCopyrightText: 2011 Martin Koller <kollix@aon.at>
7
8 ... and others (see SVN history)
9 */
10
11 // Start of verbatim comment
12
13 /*
14 ** This program was written by Richard Verhoeven (NL:5482ZX35)
15 ** at the Eindhoven University of Technology. Email: rcb5@win.tue.nl
16 **
17 ** Permission is granted to distribute, modify and use this program as long
18 ** as this comment is not removed or changed.
19 */
20
21 // End of verbatim comment
22
23 /*
24 * man2html-linux-1.0/1.1
25 * This version modified for Redhat/Caldera linux - March 1996.
26 * Michael Hamilton <michael@actrix.gen.nz>.
27 *
28 * man2html-linux-1.2
29 * Added support for BSD mandoc pages - I didn't have any documentation
30 * on the mandoc macros, so I may have missed some.
31 * Michael Hamilton <michael@actrix.gen.nz>.
32 *
33 * vh-man2html-1.3
34 * Renamed to avoid confusion (V for Verhoeven, H for Hamilton).
35 *
36 * vh-man2html-1.4
37 * Now uses /etc/man.config
38 * Added support for compressed pages.
39 * Added "length-safe" string operations for client input parameters.
40 * More secure, -M secured, and client input string lengths checked.
41 *
42 */
43
44 /*
45 ** If you want to use this program for your WWW server, adjust the line
46 ** which defines the CGIBASE or compile it with the -DCGIBASE='"..."' option.
47 **
48 ** You have to adjust the built-in manpath to your local system. Note that
49 ** every directory should start and end with the '/' and that the first
50 ** directory should be "/" to allow a full path as an argument.
51 **
52 ** The program first check if PATH_INFO contains some information.
53 ** If it does (t.i. man2html/some/thing is used), the program will look
54 ** for a manpage called PATH_INFO in the manpath.
55 **
56 ** Otherwise the manpath is searched for the specified command line argument,
57 ** where the following options can be used:
58 **
59 ** name name of manpage (csh, printf, xv, troff)
60 ** section the section (1 2 3 4 5 6 7 8 9 n l 1v ...)
61 ** -M path an extra directory to look for manpages (replaces "/")
62 **
63 ** If man2html finds multiple manpages that satisfy the options, an index
64 ** is displayed and the user can make a choice. If only one page is
65 ** found, that page will be displayed.
66 **
67 ** man2html will add links to the converted manpages. The function add_links
68 ** is used for that. At the moment it will add links as follows, where
69 ** indicates what should match to start with:
70 ** ^^^
71 ** Recognition Item Link
72 ** ----------------------------------------------------------
73 ** name(*) Manpage ../man?/name.*
74 ** ^
75 ** name@hostname Email address mailto:name@hostname
76 ** ^
77 ** method://string URL method://string
78 ** ^^^
79 ** www.host.name WWW server http://www.host.name
80 ** ^^^^
81 ** ftp.host.name FTP server ftp://ftp.host.name
82 ** ^^^^
83 ** <file.h> Include file file:/usr/include/file.h
84 ** ^^^
85 **
86 ** Since man2html does not check if manpages, hosts or email addresses exist,
87 ** some links might not work. For manpages, some extra checks are performed
88 ** to make sure not every () pair creates a link. Also out of date pages
89 ** might point to incorrect places.
90 **
91 ** The program will not allow users to get system specific files, such as
92 ** /etc/passwd. It will check that "man" is part of the specified file and
93 ** that "/../" isn't. Even if someone manages to get such file, man2html will
94 ** handle it like a manpage and will usually not produce any output (or crash).
95 **
96 ** If you find any bugs when normal manpages are converted, please report
97 ** them to me (rcb5@win.tue.nl) after you have checked that man(1) can handle
98 ** the manpage correct.
99 **
100 ** Known bugs and missing features:
101 **
102 ** * Equations are not converted at all.
103 ** * Tables are converted but some features are not possible in html.
104 ** * The tabbing environment is converted by counting characters and adding
105 ** spaces. This might go wrong (outside <PRE>)
106 ** * Some manpages rely on the fact that troff/nroff is used to convert
107 ** them and use features which are not described in the man manpages.
108 ** (definitions, calculations, conditionals, requests). I can't guarantee
109 ** that all these features work on all manpages. (I didn't have the
110 ** time to look through all the available manpages.)
111 */
112
113 #include "man2html.h"
114 #include "kio_man_debug.h"
115 #include "request_hash.h"
116
117 #include <config-runtime.h>
118
119 #include <ctype.h>
120
121 #include <unistd.h>
122 #include <string.h>
123
124 #include <stdio.h>
125
126 #include <QByteArray>
127 #include <QDateTime>
128 #include <QMap>
129 #include <QStack>
130 #include <QString>
131 #include <QTextCodec>
132 #include <QDebug>
133 #include <QRegularExpression>
134
135 #ifdef SIMPLE_MAN2HTML
136 # include <stdlib.h>
137 # include <iostream>
138 # include <dirent.h>
139 # include <sys/stat.h>
140 # include <QFile>
141 # include <QFileInfo>
142 # include <QDir>
143 # include <karchive_version.h>
144 # if KARCHIVE_VERSION >= QT_VERSION_CHECK(5, 85, 0)
145 # include <KCompressionDevice>
146 # else
147 # include <KFilterDev>
148 # endif
149 # define BYTEARRAY(x) x.constData()
150 #else
151 # include <KLocalizedString>
152 # define BYTEARRAY(x) x
153 #endif
154
155 #define NULL_TERMINATED(n) ((n) + 1)
156
157 #define HUGE_STR_MAX 10000
158 #define LARGE_STR_MAX 2000
159 #define MED_STR_MAX 500
160
161 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
162
163 /* mdoc(7) Bl/El lists to HTML list types */
164 #define BL_DESC_LIST 1
165 #define BL_BULLET_LIST 2
166 #define BL_ENUM_LIST 4
167
168 /* mdoc(7) Bd/Ed example(?) blocks */
169 #define BD_LITERAL 1
170 #define BD_INDENT 2
171
172 static int s_nroff = 1; // NROFF mode by default
173
174 static QByteArray mandoc_name; // Nm can store the first used name
175
176 static int mandoc_name_count = 0; /* Don't break on the first Nm */
177
178 /* below this you should not change anything unless you know a lot
179 ** about this program or about troff.
180 */
181
182
183 /// Structure for character definitions
184 struct CSTRDEF
185 {
186 int nr, slen;
187 const char *st;
188 };
189
190
191
192 const char NEWLINE[2] = "\n";
193
194 /**
195 * Class for defining strings and macros
196 */
197 class StringDefinition
198 {
199 public:
StringDefinition(void)200 StringDefinition(void) : m_length(0) {}
StringDefinition(int len,const char * cstr)201 StringDefinition(int len, const char* cstr) : m_length(len), m_output(cstr) {}
202 public:
203 int m_length; ///< Length of output text
204 QByteArray m_output; ///< Defined string
205 };
206
207 /**
208 * Class for defining number registers
209 * \note Not for internal read-only registers
210 */
211 class NumberDefinition
212 {
213 public:
NumberDefinition(void)214 NumberDefinition(void) : m_value(0), m_increment(0) {}
NumberDefinition(int value)215 NumberDefinition(int value) : m_value(value), m_increment(0) {}
NumberDefinition(int value,int incr)216 NumberDefinition(int value, int incr) : m_value(value), m_increment(incr) {}
217 public:
218 int m_value; ///< value of number register
219 int m_increment; ///< Increment of number register
220 // ### TODO: display form (.af)
221 };
222
223 /**
224 * Map of character definitions
225 */
226 static QMap<QByteArray, StringDefinition> s_characterDefinitionMap;
227
228 /**
229 * Map of string variable and macro definitions
230 * \note String variables and macros are the same thing!
231 */
232 static QMap<QByteArray, StringDefinition> s_stringDefinitionMap;
233
234 /**
235 * Map of number registers
236 * \note Intern number registers (starting with a dot are not handled here)
237 */
238 static QMap<QByteArray, NumberDefinition> s_numberDefinitionMap;
239
240 static void fill_old_character_definitions(void);
241
242 /**
243 * Initialize character variables
244 */
InitCharacterDefinitions(void)245 static void InitCharacterDefinitions(void)
246 {
247 fill_old_character_definitions();
248 // ### HACK: as we are converting to HTML too early, define characters with HTML references
249 s_characterDefinitionMap.insert("<-", StringDefinition(1, "←")); // <-
250 s_characterDefinitionMap.insert("->", StringDefinition(1, "→")); // ->
251 s_characterDefinitionMap.insert("<>", StringDefinition(1, "↔")); // <>
252 s_characterDefinitionMap.insert("<=", StringDefinition(1, "≤")); // <=
253 s_characterDefinitionMap.insert(">=", StringDefinition(1, "≥")); // >=
254 // End HACK
255 }
256
257 /**
258 * Initialize string variables
259 */
InitStringDefinitions(void)260 static void InitStringDefinitions(void)
261 {
262 // mdoc-only, see mdoc.samples(7)
263 s_stringDefinitionMap.insert("<=", StringDefinition(1, "≤"));
264 s_stringDefinitionMap.insert(">=", StringDefinition(1, "≥"));
265 s_stringDefinitionMap.insert("Rq", StringDefinition(1, "”"));
266 s_stringDefinitionMap.insert("Lq", StringDefinition(1, "“"));
267 s_stringDefinitionMap.insert("ua", StringDefinition(1, "&circ")); // Note this is different from \(ua
268 s_stringDefinitionMap.insert("aa", StringDefinition(1, "´"));
269 s_stringDefinitionMap.insert("ga", StringDefinition(1, "`"));
270 s_stringDefinitionMap.insert("q", StringDefinition(1, """));
271 s_stringDefinitionMap.insert("Pi", StringDefinition(1, "π"));
272 s_stringDefinitionMap.insert("Ne", StringDefinition(1, "≠"));
273 s_stringDefinitionMap.insert("Le", StringDefinition(1, "≤"));
274 s_stringDefinitionMap.insert("Ge", StringDefinition(1, "≥"));
275 s_stringDefinitionMap.insert("Lt", StringDefinition(1, "<"));
276 s_stringDefinitionMap.insert("Gt", StringDefinition(1, ">"));
277 s_stringDefinitionMap.insert("Pm", StringDefinition(1, "±"));
278 s_stringDefinitionMap.insert("If", StringDefinition(1, "∞"));
279 s_stringDefinitionMap.insert("Na", StringDefinition(3, "NaN"));
280 s_stringDefinitionMap.insert("Ba", StringDefinition(1, "|"));
281 // end mdoc-only
282 // man(7)
283 s_stringDefinitionMap.insert("Tm", StringDefinition(1, "™")); // \*(TM
284 s_stringDefinitionMap.insert("R", StringDefinition(1, "®")); // \*R
285 s_stringDefinitionMap.insert("lq", StringDefinition(1, "“")); // Left angled double quote
286 s_stringDefinitionMap.insert("rq", StringDefinition(1, "”")); // Right angled double quote
287 // end man(7)
288 // Missing characters from man(7):
289 // \*S "Change to default font size"
290 #ifndef SIMPLE_MAN2HTML
291 // Special KDE KIO man:
292 const QByteArray kdeversion(KDE_VERSION_STRING);
293 s_stringDefinitionMap.insert(".KDE_VERSION_STRING", StringDefinition(kdeversion.length(), kdeversion));
294 #endif
295 }
296
297 /**
298 * Initialize number registers
299 * \note Internal read-only registers are not handled here
300 */
InitNumberDefinitions(void)301 static void InitNumberDefinitions(void)
302 {
303 // As the date number registers are more for end-users, better choose local time.
304 // Groff seems to support Gregorian dates only
305 QDate today(QDate::currentDate());
306 s_numberDefinitionMap.insert("year", today.year()); // Y2K-correct year
307 s_numberDefinitionMap.insert("yr", today.year() - 1900); // Y2K-incorrect year
308 s_numberDefinitionMap.insert("mo", today.month());
309 s_numberDefinitionMap.insert("dy", today.day());
310 s_numberDefinitionMap.insert("dw", today.dayOfWeek());
311 }
312
313
314 #define V(A,B) ((A)*256+(B))
315
316 //used in expand_char, e.g. for "\(bu"
317 // see groff_char(7) for list
318 static const CSTRDEF standardchars[] =
319 {
320 { V('*', '*'), 1, "*" },
321 { V('*', 'A'), 1, "Α" },
322 { V('*', 'B'), 1, "Β" },
323 { V('*', 'C'), 1, "Ξ" },
324 { V('*', 'D'), 1, "Δ" },
325 { V('*', 'E'), 1, "Ε" },
326 { V('*', 'F'), 1, "Φ" },
327 { V('*', 'G'), 1, "Γ" },
328 { V('*', 'H'), 1, "Θ" },
329 { V('*', 'I'), 1, "Ι" },
330 { V('*', 'K'), 1, "Κ" },
331 { V('*', 'L'), 1, "Λ" },
332 { V('*', 'M'), 1, "&Mu:" },
333 { V('*', 'N'), 1, "Ν" },
334 { V('*', 'O'), 1, "Ο" },
335 { V('*', 'P'), 1, "Π" },
336 { V('*', 'Q'), 1, "Ψ" },
337 { V('*', 'R'), 1, "Ρ" },
338 { V('*', 'S'), 1, "Σ" },
339 { V('*', 'T'), 1, "Τ" },
340 { V('*', 'U'), 1, "Υ" },
341 { V('*', 'W'), 1, "Ω" },
342 { V('*', 'X'), 1, "Χ" },
343 { V('*', 'Y'), 1, "Η" },
344 { V('*', 'Z'), 1, "Ζ" },
345 { V('*', 'a'), 1, "α"},
346 { V('*', 'b'), 1, "β"},
347 { V('*', 'c'), 1, "ξ"},
348 { V('*', 'd'), 1, "δ"},
349 { V('*', 'e'), 1, "ε"},
350 { V('*', 'f'), 1, "φ"},
351 { V('*', 'g'), 1, "γ"},
352 { V('*', 'h'), 1, "θ"},
353 { V('*', 'i'), 1, "ι"},
354 { V('*', 'k'), 1, "κ"},
355 { V('*', 'l'), 1, "λ"},
356 { V('*', 'm'), 1, "μ" },
357 { V('*', 'n'), 1, "ν"},
358 { V('*', 'o'), 1, "ο"},
359 { V('*', 'p'), 1, "π"},
360 { V('*', 'q'), 1, "ψ"},
361 { V('*', 'r'), 1, "ρ"},
362 { V('*', 's'), 1, "σ"},
363 { V('*', 't'), 1, "τ"},
364 { V('*', 'u'), 1, "υ"},
365 { V('*', 'w'), 1, "ω"},
366 { V('*', 'x'), 1, "χ"},
367 { V('*', 'y'), 1, "η"},
368 { V('*', 'z'), 1, "ζ"},
369 { V('+', '-'), 1, "±" }, // not in groff_char(7)
370 { V('+', 'f'), 1, "φ"}, // phi1, we use the standard phi
371 { V('+', 'h'), 1, "θ"}, // theta1, we use the standard theta
372 { V('+', 'p'), 1, "ω"}, // omega1, we use the standard omega
373 { V('1', '2'), 1, "½" },
374 { V('1', '4'), 1, "¼" },
375 { V('3', '4'), 1, "¾" },
376 { V('F', 'i'), 1, "ffi" }, // ffi ligature
377 { V('F', 'l'), 1, "ffl" }, // ffl ligature
378 { V('a', 'p'), 1, "~" },
379 { V('b', 'r'), 1, "|" },
380 { V('b', 'u'), 1, "•" },
381 { V('b', 'v'), 1, "|" },
382 { V('c', 'i'), 1, "○" }, // circle
383 { V('c', 'o'), 1, "©" },
384 { V('c', 't'), 1, "¢" },
385 { V('d', 'e'), 1, "°" },
386 { V('d', 'g'), 1, "†" },
387 { V('d', 'i'), 1, "÷" },
388 { V('e', 'm'), 1, "—" },
389 { V('e', 'n'), 1, "–"},
390 { V('e', 'q'), 1, "=" },
391 { V('e', 's'), 1, "∅" },
392 { V('f', 'f'), 1, "�xFB00;" }, // ff ligature
393 { V('f', 'i'), 1, "�xFB01;" }, // fi ligature
394 { V('f', 'l'), 1, "�xFB02;" }, // fl ligature
395 { V('f', 'm'), 1, "′" },
396 { V('g', 'a'), 1, "`" },
397 { V('h', 'y'), 1, "-" },
398 { V('l', 'c'), 2, "|¯" }, // ### TODO: not in groff_char(7)
399 { V('l', 'f'), 2, "|_" }, // ### TODO: not in groff_char(7)
400 { V('l', 'k'), 1, "<FONT SIZE=+2>{</FONT>" }, // ### TODO: not in groff_char(7)
401 { V('m', 'i'), 1, "-" }, // ### TODO: not in groff_char(7)
402 { V('m', 'u'), 1, "×" },
403 { V('n', 'o'), 1, "¬" },
404 { V('o', 'r'), 1, "|" },
405 { V('p', 'l'), 1, "+" },
406 { V('r', 'c'), 2, "¯|" }, // ### TODO: not in groff_char(7)
407 { V('r', 'f'), 2, "_|" }, // ### TODO: not in groff_char(7)
408 { V('r', 'g'), 1, "®" },
409 { V('r', 'k'), 1, "<FONT SIZE=+2>}</FONT>" }, // ### TODO: not in groff_char(7)
410 { V('r', 'n'), 1, "‾" },
411 { V('r', 'u'), 1, "_" },
412 { V('s', 'c'), 1, "§" },
413 { V('s', 'l'), 1, "/" },
414 { V('s', 'q'), 2, "□" }, // WHITE SQUARE
415 { V('t', 's'), 1, "ς" }, // FINAL SIGMA
416 { V('u', 'l'), 1, "_" },
417 { V('-', 'D'), 1, "Ð" },
418 { V('S', 'd'), 1, "ð" },
419 { V('T', 'P'), 1, "Þ" },
420 { V('T', 'p'), 1, "þ" },
421 { V('A', 'E'), 1, "Æ" },
422 { V('a', 'e'), 1, "æ" },
423 { V('O', 'E'), 1, "Œ" },
424 { V('o', 'e'), 1, "œ" },
425 { V('s', 's'), 1, "ß" },
426 { V('\'', 'A'), 1, "Á" },
427 { V('\'', 'E'), 1, "É" },
428 { V('\'', 'I'), 1, "Í" },
429 { V('\'', 'O'), 1, "Ó" },
430 { V('\'', 'U'), 1, "Ú" },
431 { V('\'', 'Y'), 1, "Ý" },
432 { V('\'', 'a'), 1, "á" },
433 { V('\'', 'e'), 1, "é" },
434 { V('\'', 'i'), 1, "í" },
435 { V('\'', 'o'), 1, "ó" },
436 { V('\'', 'u'), 1, "ú" },
437 { V('\'', 'y'), 1, "ý" },
438 { V(':', 'A'), 1, "Ä" },
439 { V(':', 'E'), 1, "Ë" },
440 { V(':', 'I'), 1, "Ï" },
441 { V(':', 'O'), 1, "Ö" },
442 { V(':', 'U'), 1, "Ü" },
443 { V(':', 'a'), 1, "ä" },
444 { V(':', 'e'), 1, "ë" },
445 { V(':', 'i'), 1, "ï" },
446 { V(':', 'o'), 1, "ö" },
447 { V(':', 'u'), 1, "ü" },
448 { V(':', 'y'), 1, "ÿ" },
449 { V('^', 'A'), 1, "Â" },
450 { V('^', 'E'), 1, "Ê" },
451 { V('^', 'I'), 1, "Î" },
452 { V('^', 'O'), 1, "Ô" },
453 { V('^', 'U'), 1, "Û" },
454 { V('^', 'a'), 1, "â" },
455 { V('^', 'e'), 1, "ê" },
456 { V('^', 'i'), 1, "î" },
457 { V('^', 'o'), 1, "ô" },
458 { V('^', 'u'), 1, "û" },
459 { V('`', 'A'), 1, "À" },
460 { V('`', 'E'), 1, "È" },
461 { V('`', 'I'), 1, "Ì" },
462 { V('`', 'O'), 1, "Ò" },
463 { V('`', 'U'), 1, "Ù" },
464 { V('`', 'a'), 1, "à" },
465 { V('`', 'e'), 1, "è" },
466 { V('`', 'i'), 1, "ì" },
467 { V('`', 'o'), 1, "ò" },
468 { V('`', 'u'), 1, "ù" },
469 { V('~', 'A'), 1, "Ã" },
470 { V('~', 'N'), 1, "Ñ" },
471 { V('~', 'O'), 1, "Õ" },
472 { V('~', 'a'), 1, "ã" },
473 { V('~', 'n'), 1, "ñ" },
474 { V('~', 'o'), 1, "õ" },
475 { V(',', 'C'), 1, "Ç" },
476 { V(',', 'c'), 1, "ç" },
477 { V('/', 'L'), 1, "Ł" },
478 { V('/', 'l'), 1, "ł" },
479 { V('/', 'O'), 1, "Ø" },
480 { V('/', 'o'), 1, "ø" },
481 { V('o', 'A'), 1, "Å" },
482 { V('o', 'a'), 1, "å" },
483 { V('a', '"'), 1, "\"" },
484 { V('a', '-'), 1, "¯" },
485 { V('a', '.'), 1, "." },
486 { V('a', '^'), 1, "ˆ" },
487 { V('a', 'a'), 1, "´" },
488 { V('a', 'b'), 1, "`" },
489 { V('a', 'c'), 1, "¸" },
490 { V('a', 'd'), 1, "¨" },
491 { V('a', 'h'), 1, "˂" }, // caron
492 { V('a', 'o'), 1, "˚" }, // ring
493 { V('a', '~'), 1, "˜" },
494 { V('h', 'o'), 1, "˛" }, // ogonek
495 { V('.', 'i'), 1, "ı" }, // dot less i
496 { V('C', 's'), 1, "¤" }, //krazy:exclude=spelling
497 { V('D', 'o'), 1, "$" },
498 { V('P', 'o'), 1, "£" },
499 { V('Y', 'e'), 1, "¥" },
500 { V('F', 'n'), 1, "ƒ" },
501 { V('F', 'o'), 1, "«" },
502 { V('F', 'c'), 1, "»" },
503 { V('f', 'o'), 1, "‹" }, // single left guillemet
504 { V('f', 'c'), 1, "›" }, // single right guillemet
505 { V('r', '!'), 1, "&iecl;" },
506 { V('r', '?'), 1, "¿" },
507 { V('O', 'f'), 1, "ª" },
508 { V('O', 'm'), 1, "º" },
509 { V('p', 'c'), 1, "·" },
510 { V('S', '1'), 1, "¹" },
511 { V('S', '2'), 1, "²" },
512 { V('S', '3'), 1, "³" },
513 { V('<', '-'), 1, "←" },
514 { V('-', '>'), 1, "→" },
515 { V('<', '>'), 1, "↔" },
516 { V('d', 'a'), 1, "↓" },
517 { V('u', 'a'), 1, "↑" },
518 { V('l', 'A'), 1, "⇐" },
519 { V('r', 'A'), 1, "⇒" },
520 { V('h', 'A'), 1, "⇔" },
521 { V('d', 'A'), 1, "⇓" },
522 { V('u', 'A'), 1, "⇑" },
523 { V('b', 'a'), 1, "|" },
524 { V('b', 'b'), 1, "¦" },
525 { V('t', 'm'), 1, "™" },
526 { V('d', 'd'), 1, "‡" },
527 { V('p', 's'), 1, "¶" },
528 { V('%', '0'), 1, "‰" },
529 { V('f', '/'), 1, "⁄" }, // Fraction slash
530 { V('s', 'd'), 1, "″" },
531 { V('h', 'a'), 1, "^" },
532 { V('t', 'i'), 1, "˜" },
533 { V('l', 'B'), 1, "[" },
534 { V('r', 'B'), 1, "]" },
535 { V('l', 'C'), 1, "{" },
536 { V('r', 'C'), 1, "}" },
537 { V('l', 'a'), 1, "<" },
538 { V('r', 'a'), 1, ">" },
539 { V('l', 'h'), 1, "≤" },
540 { V('r', 'h'), 1, "≥" },
541 { V('B', 'q'), 1, "„" },
542 { V('b', 'q'), 1, "‚" },
543 { V('l', 'q'), 1, "“" },
544 { V('r', 'q'), 1, "”" },
545 { V('o', 'q'), 1, "‘" },
546 { V('c', 'q'), 1, "’" },
547 { V('a', 'q'), 1, "'" },
548 { V('d', 'q'), 1, "\"" },
549 { V('a', 't'), 1, "@" },
550 { V('s', 'h'), 1, "#" },
551 { V('r', 's'), 1, "\\" },
552 { V('t', 'f'), 1, "∴" },
553 { V('~', '~'), 1, "≅" },
554 { V('~', '='), 1, "≈" },
555 { V('!', '='), 1, "≠" },
556 { V('<', '='), 1, "≤" },
557 { V('=', '='), 1, "≡" },
558 { V('=', '~'), 1, "≅" }, // ### TODO: verify
559 { V('>', '='), 1, "≥" },
560 { V('A', 'N'), 1, "∧" },
561 { V('O', 'R'), 1, "∨" },
562 { V('t', 'e'), 1, "∃" },
563 { V('f', 'a'), 1, "∀" },
564 { V('A', 'h'), 1, "ℵ" },
565 { V('I', 'm'), 1, "ℑ" },
566 { V('R', 'e'), 1, "ℜ" },
567 { V('i', 'f'), 1, "∞" },
568 { V('m', 'd'), 1, "⋅" },
569 { V('m', 'o'), 1, "∆" }, // element ### TODO verify
570 { V('n', 'm'), 1, "∉" },
571 { V('p', 't'), 1, "∝" },
572 { V('p', 'p'), 1, "⊥" },
573 { V('s', 'b'), 1, "⊂" },
574 { V('s', 'p'), 1, "⊃" },
575 { V('i', 'b'), 1, "⊆" },
576 { V('i', 'p'), 1, "⊇" },
577 { V('i', 's'), 1, "∫" },
578 { V('s', 'r'), 1, "√" },
579 { V('p', 'd'), 1, "∂" },
580 { V('c', '*'), 1, "⊗" },
581 { V('c', '+'), 1, "⊕" },
582 { V('c', 'a'), 1, "∩" },
583 { V('c', 'u'), 1, "∪" },
584 { V('g', 'r'), 1, "V" }, // gradient ### TODO Where in Unicode?
585 { V('C', 'R'), 1, "↵" },
586 { V('s', 't'), 2, "-)" }, // "such that" ### TODO Where in Unicode?
587 { V('/', '_'), 1, "∠" },
588 { V('w', 'p'), 1, "℘" },
589 { V('l', 'z'), 1, "◊" },
590 { V('a', 'n'), 1, "-" }, // "horizontal arrow extension" ### TODO Where in Unicode?
591 };
592
593 // long form for abbreviated standard names (.St macro)
594 struct StandardName
595 {
596 const char *abbrev;
597 const char *formalName;
598 };
599
600 static const StandardName STANDARD_NAMES[] =
601 {
602 { "-ansiC", "ANSI X3.159-1989 ('ANSI C89')" },
603 { "-ansiC-89", "ANSI X3.159-1989 ('ANSI C89')" },
604 { "-isoC", "ISO/IEC 9899:1990 ('ISO C90')" },
605 { "-isoC-90", "ISO/IEC 9899:1990 ('ISO C90')" },
606 { "-isoC-99", "ISO/IEC 9899:1999 ('ISO C99')" },
607 { "-isoC-2011", "ISO/IEC 9899:2011 ('ISO C11')" },
608 { "-iso9945-1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')" },
609 { "-iso9945-1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')" },
610 { "-p1003.1", "IEEE Std 1003.1 ('POSIX.1')" },
611 { "-p1003.1-88", "IEEE Std 1003.1-1988 ('POSIX.1')" },
612 { "-p1003.1-90", "ISO/IEC 9945-1:1990 ('POSIX.1')" },
613 { "-p1003.1-96", "ISO/IEC 9945-1:1996 ('POSIX.1')" },
614 { "-p1003.1b-93", "IEEE Std 1003.1b-1993 ('POSIX.1')" },
615 { "-p1003.1c-95", "IEEE Std 1003.1c-1995 ('POSIX.1')" },
616 { "-p1003.1g-2000", "IEEE Std 1003.1g-2000 ('POSIX.1')" },
617 { "-p1003.1i-95", "IEEE Std 1003.1i-1995 ('POSIX.1')" },
618 { "-p1003.1-2001", "IEEE Std 1003.1-2001 ('POSIX.1')" },
619 { "-p1003.1-2004", "IEEE Std 1003.1-2004 ('POSIX.1')" },
620 { "-p1003.1-2008", "IEEE Std 1003.1-2008 ('POSIX.1')" },
621 { "-iso9945-2-93", "ISO/IEC 9945-2:1993 ('POSIX.2')" },
622 { "-p1003.2", "IEEE Std 1003.2 ('POSIX.2')" },
623 { "-p1003.2-92", "IEEE Std 1003.2-1992 ('POSIX.2')" },
624 { "-p1003.2a-92", "IEEE Std 1003.2a-1992 ('POSIX.2')" },
625 { "-susv2", "Version 2 of the Single UNIX Specification ('SUSv2')" },
626 { "-susv3", "Version 3 of the Single UNIX Specification ('SUSv3')" },
627 { "-svid4", "System V Interface Definition, Fourth Edition ('SVID4')" },
628 { "-xbd5", "X/Open Base Definitions Issue 5 ('XBD5')" },
629 { "-xcu5", "X/Open Commands and Utilities Issue 5 ('XCU5')" },
630 { "-xcurses4.2", "X/Open Curses Issue 4, Version 2 ('XCURSES4.2')" },
631 { "-xns5", "X/Open Networking Services Issue 5 ('XNS5')" },
632 { "-xns5.2", "X/Open Networking Services Issue 5.2 ('XNS5.2')" },
633 { "-xpg3", "X/Open Portability Guide Issue 3 ('XPG3')" },
634 { "-xpg4", "X/Open Portability Guide Issue 4 ('XPG4')" },
635 { "-xpg4.2", "X/Open Portability Guide Issue 4, Version 2 ('XPG4.2')" },
636 { "-xsh5", "X/Open System Interfaces and Headers Issue 5 ('XSH5')" },
637 { "-ieee754", "IEEE Std 754-1985" },
638 { "-iso8802-3", "ISO/IEC 8802-3:1989" }
639 };
640
641
642 /* default: print code */
643
644
645 /* static char eqndelimopen=0, eqndelimclose=0; */
646 static char escapesym = '\\', nobreaksym = '\'', controlsym = '.', fieldsym = 0, padsym = 0;
647
648 static char *buffer = nullptr;
649 static int buffpos = 0, buffmax = 0;
650 static bool scaninbuff = false;
651 static int itemdepth = 0;
652 static int in_div = 0;
653 static int dl_set[20] = { 0 };
654 static QStack<QByteArray> listItemStack;
655 static bool still_dd = 0;
656 static int tabstops[20] = { 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96 };
657 static int maxtstop = 12;
658 static int curpos = 0;
659 static bool break_the_while_loop = false;
660
661 static char *scan_troff(char *c, bool san, char **result);
662 static char *scan_troff_mandoc(char *c, bool san, char **result);
663 static int getNumberRegisterValue(const QByteArray &name, int sign = 0);
664
665 static QList<QByteArray> s_argumentList;
666
667 static QByteArray cssFile;
668
669 static QByteArray s_dollarZero; // Value of $0
670
setCssFile(const QByteArray & _cssFile)671 void setCssFile(const QByteArray& _cssFile)
672 {
673 cssFile = _cssFile;
674 }
675
fill_old_character_definitions(void)676 static void fill_old_character_definitions(void)
677 {
678 for (const CSTRDEF &standardchar : standardchars)
679 {
680 const int nr = standardchar.nr;
681 const char temp[3] = { char(nr / 256), char(nr % 256), 0 };
682 QByteArray name(temp);
683 s_characterDefinitionMap.insert(name, StringDefinition(standardchar.slen, standardchar.st));
684 }
685 }
686
687 static char outbuffer[NULL_TERMINATED(HUGE_STR_MAX)];
688 static int no_newline_output = 0;
689 static int newline_for_fun = 0;
690 static bool output_possible = false;
691
692 static const char * const includedirs[] =
693 {
694 "/usr/include",
695 "/usr/include/sys",
696 "/usr/local/include",
697 "/opt/local/include",
698 "/usr/ccs",
699 "/usr/X11R6/include",
700 "/usr/openwin/include",
701 "/usr/include/g++",
702 nullptr
703 };
704
705 static bool ignore_links = false;
706
add_links(char * c)707 static void add_links(char *c)
708 {
709 /*
710 ** Add the links to the output.
711 ** At the moment the following are recognized:
712 **
713 ** name(*) -> ../man?/name.*
714 ** method://string -> method://string
715 ** www.host.name -> http://www.host.name
716 ** ftp.host.name -> ftp://ftp.host.name
717 ** name@host -> mailto:name@host
718 ** <name.h> -> file:/usr/include/name.h (guess)
719 **
720 ** Other possible links to add in the future:
721 **
722 ** /dir/dir/file -> file:/dir/dir/file
723 */
724
725 if (ignore_links)
726 {
727 output_real(c);
728 return;
729 }
730
731 int i, j, nr;
732 char *f, *g, *h;
733 const int numtests = 6; // Nmber of tests
734 char *idtest[numtests]; // url, mailto, www, ftp, manpage, C header file
735 bool ok;
736 /* search for (section) */
737 nr = 0;
738 idtest[0] = strstr(c + 1, "://");
739 idtest[1] = strchr(c + 1, '@');
740 idtest[2] = strstr(c, "www.");
741 idtest[3] = strstr(c, "ftp.");
742 idtest[4] = strchr(c + 1, '(');
743 idtest[5] = strstr(c + 1, ".h>");
744 for (i = 0; i < numtests; ++i) nr += (idtest[i] != nullptr);
745 while (nr)
746 {
747 j = -1;
748 for (i = 0; i < numtests; i++)
749 if (idtest[i] && (j < 0 || idtest[i] < idtest[j])) j = i;
750 switch (j)
751 {
752 case 5: /* <name.h> */
753 {
754 f = idtest[5];
755 h = f + 2;
756 g = f;
757 while (g > c && g[-1] != ';') g--;
758 bool wrote_include = false;
759
760 if (g != c)
761 {
762 QByteArray dir;
763 QByteArray file(g, h - g);
764 file = file.trimmed();
765 for (int index = 0; includedirs[index]; index++)
766 {
767 QByteArray str(includedirs[index]);
768 str.append('/');
769 str.append(file);
770 if (!access(str.data(), R_OK))
771 {
772 dir = includedirs[index];
773 break;
774 }
775 }
776 if (!dir.isEmpty())
777 {
778
779 char t;
780 t = *g;
781 *g = 0;
782 output_real(c);
783 *g = t;
784 *h = 0;
785
786 QByteArray str;
787 str.append("<A HREF=\"file:");
788 str.append(dir.data());
789 str.append("/");
790 str.append(file.data());
791 str.append("\">");
792 str.append(file.data());
793 str.append("</A>>");
794
795 output_real(str.data());
796 c = f + 6;
797 wrote_include = true;
798 }
799
800 }
801
802 if (!wrote_include)
803 {
804 f[5] = 0;
805 output_real(c);
806 f[5] = ';';
807 c = f + 5;
808 }
809 }
810 break;
811 case 4: /* manpage */
812 f = idtest[j];
813 /* check section */
814 g = strchr(f, ')');
815 // The character before f must be alphanumeric, the end of a HTML tag or the end of a
816 if (g != nullptr && f > c && (g - f) < 12 && (isalnum(f[-1]) || f[-1] == '>' || (f[-1] == ';')) &&
817 (isdigit(f[1]) || (f[1] == 'n')) && f[1] != '0' && ((g - f) <= 2 || isalpha(f[2])))
818 {
819 ok = true;
820 h = f + 2;
821 while (h < g)
822 {
823 if (!isalnum(*h++))
824 {
825 ok = false;
826 break;
827 }
828 }
829 }
830 else
831 ok = false;
832
833 h = f - 1;
834 if (ok)
835 {
836 // Skip
837 qCDebug(KIO_MAN_LOG) << "BEFORE SECTION:" << *h;
838 if ((h > c + 5) && (! memcmp(h - 5, " ", 6)))
839 {
840 h -= 6;
841 qCDebug(KIO_MAN_LOG) << "Skip ";
842 }
843 else if ( (h > (c + 6)) && (!memcmp(h - 6, " ", 7)) ) //   narrow space
844 {
845 h -= 7;
846 }
847 else if (*h == ';')
848 {
849 // Not a non-breaking space, so probably not ok
850 ok = false;
851 }
852 }
853
854 if (ok)
855 {
856 /* this might be a link */
857 /* skip html makeup */
858 while (h > c && *h == '>')
859 {
860 while (h != c && *h != '<') h--;
861 if (h != c) h--;
862 }
863 if (isalnum(*h))
864 {
865 char t, sec, *e;
866 QByteArray fstr(f);
867 e = h + 1;
868 sec = f[1];
869 const int index = fstr.indexOf(')', 2);
870 QByteArray subsec;
871 if (index != -1)
872 subsec = fstr.mid(2, index - 2);
873 else // No closing ')' found, take first character as subsection.
874 subsec = fstr.mid(2, 1);
875 while (h > c && (isalnum(h[-1]) || h[-1] == '_'
876 || h[-1] == ':' || h[-1] == '-' || h[-1] == '.'))
877 h--;
878 t = *h;
879 *h = '\0';
880 output_real(c);
881 *h = t;
882 t = *e;
883 *e = '\0';
884 QByteArray str("<a href=\"man:/");
885 str += h;
886 str += '(';
887 str += char(sec);
888 if (!subsec.isEmpty())
889 str += subsec.toLower();
890 str += ")\">";
891 str += h;
892 str += "</a>";
893 output_real(str.data());
894 *e = t;
895 c = e;
896 }
897 }
898 *f = '\0';
899 output_real(c);
900 *f = '(';
901 idtest[4] = f - 1;
902 c = f;
903 break; /* manpage */
904 case 3: /* ftp */
905 case 2: /* www */
906 g = f = idtest[j];
907 while (*g && (isalnum(*g) || *g == '_' || *g == '-' || *g == '+' ||
908 *g == '.' || *g == '/')) g++;
909 if (g[-1] == '.') g--;
910 if (g - f > 4)
911 {
912 char t;
913 t = *f;
914 *f = '\0';
915 output_real(c);
916 *f = t;
917 t = *g;
918 *g = '\0';
919 QByteArray str;
920 str.append("<A HREF=\"");
921 str.append(j == 3 ? "ftp" : "http");
922 str.append("://");
923 str.append(f);
924 str.append("\">");
925 str.append(f);
926 str.append("</A>");
927 output_real(str.data());
928 *g = t;
929 c = g;
930 }
931 else
932 {
933 f[3] = '\0';
934 output_real(c);
935 c = f + 3;
936 f[3] = '.';
937 }
938 break;
939 case 1: /* mailto */
940 g = f = idtest[1];
941 while (g > c && (isalnum(g[-1]) || g[-1] == '_' || g[-1] == '-' ||
942 g[-1] == '+' || g[-1] == '.' || g[-1] == '%')) g--;
943 if (g - 7 >= c && g[-1] == ':')
944 {
945 // We have perhaps an email address starting with mailto:
946 if (!qstrncmp("mailto:", g - 7, 7))
947 g -= 7;
948 }
949 h = f + 1;
950 while (*h && (isalnum(*h) || *h == '_' || *h == '-' || *h == '+' ||
951 *h == '.')) h++;
952 if (*h == '.') h--;
953 if (h - f > 4 && f - g > 1)
954 {
955 char t;
956 t = *g;
957 *g = '\0';
958 output_real(c);
959 *g = t;
960 t = *h;
961 *h = '\0';
962 QByteArray str;
963 str.append("<A HREF=\"mailto:");
964 str.append(g);
965 str.append("\">");
966 str.append(g);
967 str.append("</A>");
968 output_real(str.data());
969 *h = t;
970 c = h;
971 }
972 else
973 {
974 *f = '\0';
975 output_real(c);
976 *f = '@';
977 idtest[1] = c;
978 c = f;
979 }
980 break;
981 case 0: /* url */
982 g = f = idtest[0]; // ://foo...
983
984 // backup before :// to get protocol
985 while (g > c && isalpha(g[-1]) && islower(g[-1])) g--;
986 h = f + 3; // start past ://
987 // determine length of path and part of query it looks like...
988 while (*h && !isspace(*h) && *h != '<' && *h != '>' && *h != '"' &&
989 *h != '&') h++;
990 // if protocol length 3-6 characters and path has any length at all...
991 // more tests added because this code breaks stylesheet links that use
992 // the correct file:/// stuff.
993 if (f - g > 2 && f - g < 7 && h - f > 3 && (strstr(c, "http://") != nullptr || strstr(c, "ftp://") != nullptr))
994 {
995 char t;
996 t = *g;
997 *g = '\0';
998 output_real(c);
999 *g = t;
1000 t = *h;
1001 *h = '\0';
1002 QByteArray str;
1003 str.append("<A HREF=\"");
1004 str.append(g);
1005 str.append("\">");
1006 str.append(g);
1007 str.append("</A>");
1008 output_real(str.data());
1009 *h = t;
1010 c = h;
1011 }
1012 else
1013 {
1014 f[1] = '\0';
1015 output_real(c);
1016 f[1] = '/';
1017 c = f + 1;
1018 }
1019 break;
1020 default:
1021 break;
1022 }
1023 nr = 0;
1024 if (idtest[0] && idtest[0] <= c) idtest[0] = strstr(c + 1, "://");
1025 if (idtest[1] && idtest[1] <= c) idtest[1] = strchr(c + 1, '@');
1026 if (idtest[2] && idtest[2] < c) idtest[2] = strstr(c, "www.");
1027 if (idtest[3] && idtest[3] < c) idtest[3] = strstr(c, "ftp.");
1028 if (idtest[4] && idtest[4] <= c) idtest[4] = strchr(c + 1, '(');
1029 if (idtest[5] && idtest[5] <= c) idtest[5] = strstr(c + 1, ".h>");
1030 for (i = 0; i < numtests; i++) nr += (idtest[i] != nullptr);
1031 }
1032 output_real(c);
1033 }
1034
1035 //---------------------------------------------------------------------
1036
1037 static QByteArray current_font;
1038 static int current_size = 0;
1039
1040 /*
1041 "fillout" is the mode of text output:
1042 1 = fill mode (line breaks happen when the browser wants them. Normal HTML text)
1043 0 = no-fill mode (preformatted text (<pre>..</pre>).
1044 Input lines are output as-is, retaining line breaks and ignoring the current line length.
1045 */
1046 static int fillout = 1;
1047
1048 //---------------------------------------------------------------------
1049
out_html(const char * c)1050 static void out_html(const char *c)
1051 {
1052 if ( !c || !*c ) return;
1053
1054 // Added, probably due to the const?
1055 char *c2 = qstrdup(c);
1056 char *c3 = c2;
1057
1058 static int obp = 0;
1059
1060 if (no_newline_output)
1061 {
1062 int i = 0;
1063 no_newline_output = 1;
1064 while (c2[i])
1065 {
1066 if (!no_newline_output) c2[i-1] = c2[i];
1067 if (c2[i] == '\n') no_newline_output = 0;
1068 i++;
1069 }
1070 if (!no_newline_output) c2[i-1] = 0;
1071 }
1072 if (scaninbuff)
1073 {
1074 while (*c2)
1075 {
1076 if (buffpos >= buffmax)
1077 {
1078 char *h = new char[buffmax*2];
1079
1080 memcpy(h, buffer, buffmax);
1081 delete [] buffer;
1082 buffer = h;
1083 buffmax = buffmax * 2;
1084 }
1085 buffer[buffpos++] = *c2++;
1086 }
1087 }
1088 else if (output_possible)
1089 {
1090 while (*c2)
1091 {
1092 outbuffer[obp++] = *c2;
1093 if (*c2 == '\n' || obp >= HUGE_STR_MAX)
1094 {
1095 outbuffer[obp] = '\0';
1096 add_links(outbuffer);
1097 obp = 0;
1098 }
1099 c2++;
1100 }
1101 }
1102 delete [] c3;
1103 }
1104
1105 //---------------------------------------------------------------------
1106
checkListStack()1107 void checkListStack() // see if we need to end a previously begun list item
1108 {
1109 if ( !listItemStack.isEmpty() && (listItemStack.size() == itemdepth) )
1110 {
1111 out_html("</");
1112 out_html(listItemStack.pop());
1113 out_html(">");
1114 }
1115 }
1116
1117 //---------------------------------------------------------------------
1118
set_font(const QByteArray & name)1119 static QByteArray set_font(const QByteArray& name)
1120 {
1121 // Every font but R (Regular) creates <span> elements
1122 QByteArray markup;
1123 if ( (current_font != "R") && (current_font != "P") && !current_font.isEmpty() )
1124 markup += "</span>";
1125 const uint len = name.length();
1126 bool fontok = true;
1127 if (len == 1)
1128 {
1129 const char lead = name[0];
1130 switch (lead)
1131 {
1132 case 'P': // ### TODO: this seems to mean "precedent font"
1133 case 'R':
1134 break; // regular, do nothing
1135 case 'I':
1136 markup += "<span style=\"font-style:italic\">";
1137 break;
1138 case 'B':
1139 markup += "<span style=\"font-weight:bold\">";
1140 break;
1141 case 'L':
1142 markup += "<span style=\"font-family:monospace\">";
1143 break; // ### What's L?
1144 default:
1145 fontok = false;
1146 }
1147 }
1148 else if (len == 2)
1149 {
1150 if (name == "BI")
1151 markup += "<span style=\"font-style:italic;font-weight:bold\">";
1152 // Courier
1153 else if (name == "CR")
1154 markup += "<span style=\"font-family:monospace\">";
1155 else if (name == "CW") // CW is used by pod2man(1) (part of perldoc(1))
1156 markup += "<span style=\"font-family:monospace\">";
1157 else if (name == "CI")
1158 markup += "<span style=\"font-family:monospace;font-style:italic\">";
1159 else if (name == "CB")
1160 markup += "<span style=\"font-family:monospace;font-weight:bold\">";
1161 // Times
1162 else if (name == "TR")
1163 markup += "<span style=\"font-family:serif\">";
1164 else if (name == "TI")
1165 markup += "<span style=\"font-family:serif;font-style:italic\">";
1166 else if (name == "TB")
1167 markup += "<span style=\"font-family:serif;font-weight:bold\">";
1168 // Helvetica
1169 else if (name == "HR")
1170 markup += "<span style=\"font-family:sans-serif\">";
1171 else if (name == "HI")
1172 markup += "<span style=\"font-family:sans-serif;font-style:italic\">";
1173 else if (name == "HB")
1174 markup += "<span style=\"font-family:sans-serif;font-weight:bold\">";
1175 else
1176 fontok = false;
1177 }
1178 else if (len == 3)
1179 {
1180 if (name == "CBI")
1181 markup += "<span style=\"font-family:monospace;font-style:italic;font-weight:bold\">";
1182 else if (name == "TBI")
1183 markup += "<span style=\"font-family:serif;font-style:italic;font-weight:bold\">";
1184 else if (name == "HBI")
1185 markup += "<span style=\"font-family:sans-serif;font-style:italic;font-weight:bold\">";
1186 else
1187 fontok = false;
1188 }
1189 else
1190 fontok = false;
1191
1192 if (fontok)
1193 current_font = name;
1194 else
1195 current_font = "R"; // Still nothing, then it is 'R' (Regular) // krazy:exclude=doublequote_chars
1196 return markup;
1197 }
1198
1199 //---------------------------------------------------------------------
1200
change_to_size(int nr)1201 static QByteArray change_to_size(int nr)
1202 {
1203 switch (nr)
1204 {
1205 case '0':
1206 case '1':
1207 case '2':
1208 case '3':
1209 case '4':
1210 case '5':
1211 case '6':
1212 case '7':
1213 case '8':
1214 case '9':
1215 nr = nr - '0';
1216 break;
1217 case '\0':
1218 break;
1219 default:
1220 nr = current_size + nr;
1221 if (nr > 9) nr = 9;
1222 if (nr < -9) nr = -9;
1223 break;
1224 }
1225 if (nr == current_size)
1226 return "";
1227 const QByteArray font(current_font);
1228 QByteArray markup;
1229 markup = set_font("R");
1230 if (current_size)
1231 markup += "</span>";
1232 current_size = nr;
1233 if (nr)
1234 {
1235 int percent = 100 + nr*1;
1236 markup += "<span style=\"font-size:";
1237 markup += QByteArray::number(percent);
1238 markup += "%\">";
1239 }
1240 markup += set_font(font);
1241 return markup;
1242 }
1243
1244 //---------------------------------------------------------------------
1245
1246 /* static int asint=0; */
1247 static int intresult = 0;
1248
1249 static bool skip_escape = false;
1250 static bool single_escape = false;
1251
1252 static char *scan_escape_direct(char *c, QByteArray& cstr);
1253
1254 /**
1255 * scan a named character
1256 * param c position
1257 */
scan_named_character(char * & c)1258 static QByteArray scan_named_character(char*& c)
1259 {
1260 QByteArray name;
1261 if (*c == '(')
1262 {
1263 // \*(ab Name of two characters
1264 if (c[1] == escapesym)
1265 {
1266 QByteArray cstr;
1267 c = scan_escape_direct(c + 2, cstr);
1268 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1269 name = cstr;
1270 }
1271 else
1272 {
1273 name += c[1];
1274 name += c[2];
1275 c += 3;
1276 }
1277 }
1278 else if (*c == '[')
1279 {
1280 // \*[long_name] Long name
1281 // Named character groff(7)
1282 // We must find the ] to get a name
1283 c++;
1284 while (*c && *c != ']' && *c != '\n')
1285 {
1286 if (*c == escapesym)
1287 {
1288 QByteArray cstr;
1289 c = scan_escape_direct(c + 1, cstr);
1290 const int result = cstr.indexOf(']');
1291 if (result == -1)
1292 name += cstr;
1293 else
1294 {
1295 // Note: we drop the characters after the ]
1296 name += cstr.left(result);
1297 }
1298 }
1299 else
1300 {
1301 name += *c;
1302 c++;
1303 }
1304 }
1305 if (!*c || *c == '\n')
1306 {
1307 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse character name: " << BYTEARRAY(name);
1308 return "";
1309 }
1310 c++;
1311 }
1312 else if (*c == 'C' || c[1] == '\'')
1313 {
1314 // \C'name'
1315 c += 2;
1316 while (*c && *c != '\'' && *c != '\n')
1317 {
1318 if (*c == escapesym)
1319 {
1320 QByteArray cstr;
1321 c = scan_escape_direct(c + 1, cstr);
1322 const int result = cstr.indexOf('\'');
1323 if (result == -1)
1324 name += cstr;
1325 else
1326 {
1327 // Note: we drop the characters after the ]
1328 name += cstr.left(result);
1329 }
1330 }
1331 else
1332 {
1333 name += *c;
1334 c++;
1335 }
1336 }
1337 if (!*c || *c == '\n')
1338 {
1339 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse (\\C mode) character name: " << BYTEARRAY(name);
1340 return "";
1341 }
1342 c++;
1343 }
1344 // Note: characters with a one character length name do not exist, as they would collide with other escapes
1345
1346 // Now we have the name, let us find it between the string names
1347 QMap<QByteArray, StringDefinition>::const_iterator it = s_characterDefinitionMap.constFind(name);
1348 if (it == s_characterDefinitionMap.constEnd())
1349 {
1350 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find character with name: " << BYTEARRAY(name);
1351 // No output, as an undefined string is empty by default
1352 return "";
1353 }
1354 else
1355 {
1356 qCDebug(KIO_MAN_LOG) << "Character with name: \"" << BYTEARRAY(name) << "\" => " << BYTEARRAY((*it).m_output);
1357 return (*it).m_output;
1358 }
1359 }
1360
1361 //---------------------------------------------------------------------
1362
scan_named_string(char * & c)1363 static QByteArray scan_named_string(char*& c)
1364 {
1365 QByteArray name;
1366 if (*c == '(')
1367 {
1368 // \*(ab Name of two characters
1369 if (c[1] == escapesym)
1370 {
1371 QByteArray cstr;
1372 c = scan_escape_direct(c + 2, cstr);
1373 qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr);
1374 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1375 name = cstr;
1376 }
1377 else
1378 {
1379 name += c[1];
1380 name += c[2];
1381 c += 3;
1382 }
1383 }
1384 else if (*c == '[')
1385 {
1386 // \*[long_name] Long name
1387 // Named character groff(7)
1388 // We must find the ] to get a name
1389 c++;
1390 while (*c && *c != ']' && *c != '\n')
1391 {
1392 if (*c == escapesym)
1393 {
1394 QByteArray cstr;
1395 c = scan_escape_direct(c + 1, cstr);
1396 const int result = cstr.indexOf(']');
1397 if (result == -1)
1398 name += cstr;
1399 else
1400 {
1401 // Note: we drop the characters after the ]
1402 name += cstr.left(result);
1403 }
1404 }
1405 else
1406 {
1407 name += *c;
1408 c++;
1409 }
1410 }
1411 if (!*c || *c == '\n')
1412 {
1413 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse string name: " << BYTEARRAY(name);
1414 return "";
1415 }
1416 c++;
1417 }
1418 else
1419 {
1420 // \*a Name of one character
1421 name += *c;
1422 c++;
1423 }
1424 // Now we have the name, let us find it between the string names
1425 QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(name);
1426 if (it == s_stringDefinitionMap.constEnd())
1427 {
1428 // try a number register:
1429 return QByteArray::number(getNumberRegisterValue(name));
1430
1431 //qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string with name: " << BYTEARRAY(name);
1432 // No output, as an undefined string is empty by default
1433 //return "";
1434 }
1435 else
1436 {
1437 qCDebug(KIO_MAN_LOG) << "String with name: '" << BYTEARRAY(name) << "' => >>>" << BYTEARRAY((*it).m_output) << "<<<";
1438 return (*it).m_output;
1439 }
1440 }
1441
1442 //---------------------------------------------------------------------
1443
scan_dollar_parameter(char * & c)1444 static QByteArray scan_dollar_parameter(char*& c)
1445 {
1446 int argno = 0; // No dollar argument number yet!
1447 if (*c == '0')
1448 {
1449 //qCDebug(KIO_MAN_LOG) << "$0";
1450 c++;
1451 return s_dollarZero;
1452 }
1453 else if (*c >= '1' && *c <= '9')
1454 {
1455 //qCDebug(KIO_MAN_LOG) << "$ direct";
1456 argno = (*c - '0');
1457 c++;
1458 }
1459 else if (*c == '(')
1460 {
1461 //qCDebug(KIO_MAN_LOG) << "$(";
1462 if (c[1] && c[2] && c[1] >= '0' && c[1] <= '9' && c[2] >= '0' && c[2] <= '9')
1463 {
1464 argno = (c[1] - '0') * 10 + (c[2] - '0');
1465 c += 3;
1466 }
1467 else
1468 {
1469 if (!c[1])
1470 c++;
1471 else if (!c[2])
1472 c += 2;
1473 else
1474 c += 3;
1475 return "";
1476 }
1477 }
1478 else if (*c == '[')
1479 {
1480 //qCDebug(KIO_MAN_LOG) << "$[";
1481 argno = 0;
1482 c++;
1483 while (*c && *c >= '0' && *c <= '9' && *c != ']')
1484 {
1485 argno *= 10;
1486 argno += (*c - '0');
1487 c++;
1488 }
1489 if (*c != ']')
1490 {
1491 return "";
1492 }
1493 c++;
1494 }
1495 else if ((*c == '*') || (*c == '@'))
1496 {
1497 const bool quote = (*c == '@');
1498 QList<QByteArray>::const_iterator it = s_argumentList.constBegin();
1499 QByteArray param;
1500 bool space = false;
1501 for (; it != s_argumentList.constEnd(); ++it)
1502 {
1503 if (space)
1504 param += ' ';
1505 if (quote)
1506 param += '\"'; // Not as HTML, as it could be used by macros !
1507 param += (*it);
1508 if (quote)
1509 param += '\"'; // Not as HTML, as it could be used by macros!
1510 space = true;
1511 }
1512 c++;
1513 return param;
1514 }
1515 else
1516 {
1517 qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown parameter $" << *c;
1518 return "";
1519 }
1520 //qCDebug(KIO_MAN_LOG) << "ARG $" << argno;
1521 if (!s_argumentList.isEmpty() && argno > 0)
1522 {
1523 //qCDebug(KIO_MAN_LOG) << "ARG $" << argno << " OK!";
1524 argno--;
1525 if (argno >= s_argumentList.size())
1526 {
1527 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find parameter $" << (argno + 1);
1528 return "";
1529 }
1530
1531 return s_argumentList[argno];
1532 }
1533 return "";
1534 }
1535
1536 //---------------------------------------------------------------------
1537 /// return the value of read-only number registers
1538
read_only_number_register(const QByteArray & name)1539 static int read_only_number_register(const QByteArray& name)
1540 {
1541 // Internal read-only variables
1542 if (name == ".$")
1543 {
1544 qCDebug(KIO_MAN_LOG) << "\\n[.$] == " << s_argumentList.size();
1545 return s_argumentList.size();
1546 }
1547 else if (name == ".g")
1548 return 0; // We are not groff(1)
1549 else if (name == ".s")
1550 return current_size;
1551 #if 0
1552 // ### TODO: map the fonts to a number
1553 else if (name == ".f")
1554 return current_font;
1555 #endif
1556 else if (name == ".P")
1557 return 0; // We are not printing
1558 else if (name == ".A")
1559 return s_nroff;
1560 #ifndef SIMPLE_MAN2HTML
1561 // Special KDE KIO man:
1562 const QString version_string(KDE_VERSION_STRING);
1563 const int version_major = version_string.section('.', 0, 0).toInt();
1564 const int version_minor = version_string.section('.', 1, 1).toInt();
1565 const int version_patch = version_string.section('.', 2, 2).toInt();
1566 if (name == ".KDE_VERSION_MAJOR")
1567 return version_major;
1568 else if (name == ".KDE_VERSION_MINOR")
1569 return version_minor;
1570 else if (name == ".KDE_VERSION_RELEASE")
1571 return version_patch;
1572 else if (name == ".KDE_VERSION")
1573 return (version_major << 16) | (version_minor << 8) | version_patch;
1574 #endif
1575 else if ( name == ".T" )
1576 return 0; // Set to 1 in nroff, if -T option used; always 0 in troff.
1577
1578 // ### TODO: groff defines many more read-only number registers
1579 qCDebug(KIO_MAN_LOG) << "EXCEPTION: unknown read-only number register: " << BYTEARRAY(name);
1580
1581 return 0; // Undefined variable
1582
1583 }
1584
1585 //---------------------------------------------------------------------
1586
getNumberRegisterValue(const QByteArray & name,int sign)1587 static int getNumberRegisterValue(const QByteArray &name, int sign)
1588 {
1589 if (name[0] == '.')
1590 {
1591 return read_only_number_register(name);
1592 }
1593 else
1594 {
1595 QMap< QByteArray, NumberDefinition >::iterator it = s_numberDefinitionMap.find(name);
1596 if (it == s_numberDefinitionMap.end())
1597 {
1598 return 0; // Undefined variable
1599 }
1600 else
1601 {
1602 (*it).m_value += sign * (*it).m_increment;
1603 return (*it).m_value;
1604 }
1605 }
1606 }
1607
1608 //---------------------------------------------------------------------
1609 /// get the value of a number register and auto-increment if asked
1610
scan_number_register(char * & c)1611 static int scan_number_register(char*& c)
1612 {
1613 int sign = 0; // Sign for auto-increment (if any)
1614 switch (*c)
1615 {
1616 case '+':
1617 sign = 1;
1618 c++;
1619 break;
1620 case '-':
1621 sign = -1;
1622 c++;
1623 break;
1624 default:
1625 break;
1626 }
1627 QByteArray name;
1628 if (*c == '[')
1629 {
1630 c++;
1631 if (*c == '+')
1632 {
1633 sign = 1;
1634 c++;
1635 }
1636 else if (*c == '-')
1637 {
1638 sign = -1;
1639 c++;
1640 }
1641 while (*c && *c != ']' && *c != '\n')
1642 {
1643 // ### TODO: a \*[string] could be inside and should be processed
1644 name += *c;
1645 c++;
1646 }
1647 if (!*c || *c == '\n')
1648 {
1649 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse number register name: " << BYTEARRAY(name);
1650 return 0;
1651 }
1652 c++;
1653 }
1654 else if (*c == '(')
1655 {
1656 c++;
1657 if (*c == '+')
1658 {
1659 sign = 1;
1660 c++;
1661 }
1662 else if (*c == '-')
1663 {
1664 sign = -1;
1665 c++;
1666 }
1667 name += c[0];
1668 name += c[1];
1669 c += 2;
1670 }
1671 else
1672 {
1673 name += *c;
1674 c++;
1675 }
1676
1677 return getNumberRegisterValue(name, sign);
1678 }
1679
1680 //---------------------------------------------------------------------
1681 // scan a name from the following
1682 // x ... return x (one char)
1683 // (xx ... return xx (two chars)
1684 // [xxx] ... return xxx (any chars)
1685 // after scanning, c points to the terminating char (0, \n or ])
1686
scan_name(char * & c)1687 static QByteArray scan_name(char *&c)
1688 {
1689 QByteArray name;
1690 if ( *c == '(' )
1691 {
1692 int i = 0;
1693 for (c++; *c && (*c != '\n') && (i < 2); c++, i++)
1694 name += *c;
1695 }
1696 else if ( *c == '[' )
1697 {
1698 for (c++; *c && (*c != ']') && (*c != '\n'); c++)
1699 name += *c;
1700 }
1701 else
1702 name += *c;
1703
1704 return name;
1705 }
1706
1707 //---------------------------------------------------------------------
1708 /// get and set font
1709
scan_named_font(char * & c)1710 static QByteArray scan_named_font(char*& c)
1711 {
1712 QByteArray name;
1713 if (*c == '(')
1714 {
1715 // \f(ab Name of two characters
1716 if (c[1] == escapesym)
1717 {
1718 QByteArray cstr;
1719 c = scan_escape_direct(c + 2, cstr);
1720 qCDebug(KIO_MAN_LOG) << "\\(" << BYTEARRAY(cstr);
1721 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1722 name = cstr;
1723 }
1724 else
1725 {
1726 name += c[1];
1727 name += c[2];
1728 c += 3;
1729 }
1730 }
1731 else if (*c == '[')
1732 {
1733 // \f[long_name] Long name
1734 // We must find the ] to get a name
1735 c++;
1736 while (*c && *c != ']' && *c != '\n')
1737 {
1738 if (*c == escapesym)
1739 {
1740 QByteArray cstr;
1741 c = scan_escape_direct(c + 1, cstr);
1742 const int result = cstr.indexOf(']');
1743 if (result == -1)
1744 name += cstr;
1745 else
1746 {
1747 // Note: we drop the characters after the ]
1748 name += cstr.left(result);
1749 }
1750 }
1751 else
1752 {
1753 name += *c;
1754 c++;
1755 }
1756 }
1757 if (!*c || *c == '\n')
1758 {
1759 qCDebug(KIO_MAN_LOG) << "Found linefeed! Could not parse font name: " << BYTEARRAY(name);
1760 return "";
1761 }
1762 c++;
1763 }
1764 else if ( *c ) // \f alone makes c point at 0-byte
1765 {
1766 // \fa Font name with one character or one digit
1767 // ### HACK do *not* use: name = *c; or name would be empty
1768 name += *c;
1769 c++;
1770 }
1771 //qCDebug(KIO_MAN_LOG) << "FONT NAME: " << BYTEARRAY( name );
1772 // Now we have the name, let us find the font
1773 bool ok = false;
1774 const unsigned int number = name.toUInt(&ok);
1775 if (ok)
1776 {
1777 if (number < 5)
1778 {
1779 const char* const fonts[] = { "R", "I", "B", "BI", "CR" }; // Regular, Italic, Bold, Bold Italic, Courier regular
1780 name = fonts[ number ];
1781 }
1782 else
1783 {
1784 qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has too big number: " << BYTEARRAY(name) << " => " << number;
1785 name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars
1786 }
1787 }
1788 else if (name.isEmpty())
1789 {
1790 qCDebug(KIO_MAN_LOG) << "EXCEPTION: font has no name => using R";
1791 name = "R"; // Let assume Regular // krazy:exclude=doublequote_chars
1792 }
1793 if (!skip_escape)
1794 return set_font(name);
1795 else
1796 return "";
1797 }
1798
1799 //---------------------------------------------------------------------
1800
scan_number_code(char * & c)1801 static QByteArray scan_number_code(char*& c)
1802 {
1803 QByteArray number;
1804 if (*c != '\'')
1805 return "";
1806 c++; // Go past the opening single quote
1807 while (*c && (*c != '\n') && (*c != '\''))
1808 {
1809 number += *c;
1810 c++;
1811 }
1812 bool ok = false;
1813 unsigned int result = number.toUInt(&ok);
1814 if ((result < ' ') || (result > 65535))
1815 return "";
1816 else if (result == '\t')
1817 {
1818 curpos += 8;
1819 curpos &= 0xfff8;
1820 return "\t";
1821 }
1822 number.setNum(result);
1823 number.prepend("&#");
1824 number.append(";");
1825 curpos ++;
1826 c++; // Go past the closing single quote
1827 return number;
1828 }
1829
1830 //---------------------------------------------------------------------
1831 // ### TODO known missing escapes from groff(7):
1832 // ### TODO \R
1833
scan_escape_direct(char * c,QByteArray & cstr)1834 static char *scan_escape_direct(char *c, QByteArray& cstr)
1835 {
1836 bool exoutputp;
1837 bool exskipescape;
1838 int i, j;
1839 bool cplusplus = true; // Should the c++ call be executed at the end of the function
1840
1841 cstr.clear();
1842 intresult = 0;
1843 switch (*c)
1844 {
1845 case 'e':
1846 cstr += escapesym;
1847 curpos++;
1848 break;
1849 case '0': // space of digit width
1850 cstr = " "; // Unicode FIGURE SPACE
1851 curpos++;
1852 break;
1853 case '~': // non-breakable-space (resizeable!)
1854 case ' ':
1855 cstr = " ";
1856 curpos++;
1857 break;
1858 case '|': // half-non-breakable-space
1859 case '^': // quarter-non-breakable-space
1860 cstr = " "; // Unicode NARROW NO-BREAK SPACE
1861 curpos++;
1862 break;
1863 case ':':
1864 break; // ignore optional line break
1865 case ',':
1866 break; // left italic correction, always a zero motion
1867 case '/':
1868 cstr = " "; // Unicode THIN SPACE
1869 curpos++;
1870 break; // italic correction, i.e. a small piece of horizontal motion
1871 case '"': // comment. skip rest of line
1872 for (c++; *c && (*c != '\n'); c++) ;
1873 cplusplus = false;
1874 break;
1875 // ### TODO \# like \" but does not ignore the end of line (groff(7))
1876 case '$':
1877 {
1878 c++;
1879 cstr = scan_dollar_parameter(c);
1880 cplusplus = false;
1881 break;
1882 }
1883 case 'z':
1884 {
1885 c++;
1886 if (*c == '\\')
1887 {
1888 c = scan_escape_direct(c + 1, cstr);
1889 c--;
1890 }
1891 else
1892 cstr = QByteArray(c, 1);
1893 break;
1894 }
1895 case 'k':
1896 {
1897 // Store the current horizontal position in the _input_ line in
1898 // number register with name POSITION
1899 c++;
1900 cstr = scan_name(c);
1901 cstr.clear(); // TODO not implemented; discard it
1902 break;
1903 }
1904 case '!':
1905 case '%':
1906 case 'a':
1907 case 'd':
1908 case 'r':
1909 case 'u':
1910 case '\n':
1911 case '&': // Non-printing, zero width character
1912 case ')': // Transparent non-printing zero width character
1913 break;
1914 case '(':
1915 case '[':
1916 case 'C':
1917 {
1918 // Do not go forward as scan_named_character needs the leading symbol
1919 cstr = scan_named_character(c);
1920 cplusplus = false;
1921 break;
1922 }
1923 case '*':
1924 {
1925 c++;
1926 cstr = scan_named_string(c);
1927 cplusplus = false;
1928 break;
1929 }
1930 case 'f':
1931 {
1932 c++;
1933 cstr = scan_named_font(c);
1934 cplusplus = false;
1935 break;
1936 }
1937 case 'F': // font family
1938 {
1939 c++;
1940 cstr = scan_name(c);
1941
1942 if ( cstr == "C" )
1943 cstr = set_font("CR");
1944 else if ( cstr == "T" )
1945 cstr = set_font("TR");
1946 else if ( cstr == "H" )
1947 cstr = set_font("HR");
1948 else
1949 cstr = set_font(cstr);
1950
1951 break;
1952 }
1953 case 'm': // color
1954 {
1955 c++;
1956 cstr = scan_name(c);
1957
1958 if ( cstr.isEmpty() )
1959 cstr = "</span>";
1960 else
1961 cstr = "<span style='color:" + cstr + "'>";
1962
1963 break;
1964 }
1965 case 's': // ### FIXME: many forms are missing
1966 c++;
1967 j = 0;
1968 i = 0;
1969 if (*c == '-')
1970 {
1971 j = -1;
1972 c++;
1973 }
1974 else if (*c == '+')
1975 {
1976 j = 1;
1977 c++;
1978 }
1979 if (*c == '0') c++;
1980 else if (*c == '\\')
1981 {
1982 c++;
1983 c = scan_escape_direct(c, cstr);
1984 i = intresult;
1985 if (!j) j = 1;
1986 }
1987 else
1988 while (isdigit(*c) && (!i || (!j && i < 4))) i = i * 10 + (*c++) - '0';
1989 if (!j)
1990 {
1991 j = 1;
1992 if (i) i = i - 10;
1993 }
1994 if (!skip_escape) cstr = change_to_size(i * j);
1995 c--;
1996 break;
1997 case 'n':
1998 {
1999 c++;
2000 intresult = scan_number_register(c);
2001 cplusplus = false;
2002 break;
2003 }
2004 case 'w':
2005 c++;
2006 i = *c;
2007 c++;
2008 exoutputp = output_possible;
2009 exskipescape = skip_escape;
2010 output_possible = false;
2011 skip_escape = true;
2012 j = 0;
2013 while (*c != i)
2014 {
2015 j++;
2016 if (*c == escapesym)
2017 c = scan_escape_direct(c + 1, cstr);
2018 else
2019 c++;
2020 }
2021 output_possible = exoutputp;
2022 skip_escape = exskipescape;
2023 intresult = j;
2024 break;
2025 case 'l':
2026 cstr = "<HR>";
2027 curpos = 0;
2028 case 'b':
2029 case 'v':
2030 case 'x':
2031 case 'o':
2032 case 'L':
2033 case 'h':
2034 c++;
2035 i = *c;
2036 c++;
2037 exoutputp = output_possible;
2038 exskipescape = skip_escape;
2039 output_possible = 0;
2040 skip_escape = true;
2041 while (*c != i)
2042 if (*c == escapesym) c = scan_escape_direct(c + 1, cstr);
2043 else c++;
2044 output_possible = exoutputp;
2045 skip_escape = exskipescape;
2046 break;
2047 case 'c':
2048 no_newline_output = 1;
2049 break;
2050 case '{':
2051 newline_for_fun++;
2052 break; // Start conditional block
2053 case '}':
2054 if (newline_for_fun) newline_for_fun--;
2055 break; // End conditional block
2056 case 'p':
2057 cstr = "<BR>\n";
2058 curpos = 0;
2059 break;
2060 case 't':
2061 cstr = "\t";
2062 curpos = (curpos + 8) & 0xfff8;
2063 break;
2064 case '<':
2065 cstr = "<";
2066 curpos++;
2067 break;
2068 case '>':
2069 cstr = ">";
2070 curpos++;
2071 break;
2072 case '\\':
2073 {
2074 if (single_escape)
2075 c--;
2076 else
2077 cstr = "\\";
2078 break;
2079 }
2080 case 'N':
2081 {
2082 c++;
2083 cstr = scan_number_code(c);
2084 cplusplus = false;
2085 break;
2086 }
2087 case '\'':
2088 cstr = "´";
2089 curpos++;
2090 break; // groff(7) ### TODO verify
2091 case '`':
2092 cstr = "`"; // krazy:exclude=doublequote_chars
2093 curpos++;
2094 break; // groff(7)
2095 case '-':
2096 cstr = "-"; // krazy:exclude=doublequote_chars
2097 curpos++;
2098 break; // groff(7)
2099 case '.':
2100 cstr = "."; // krazy:exclude=doublequote_chars
2101 curpos++;
2102 break; // groff(7)
2103 default:
2104 cstr = QByteArray(c, 1);
2105 curpos++;
2106 break;
2107 }
2108 if (cplusplus && *c)
2109 c++;
2110 return c;
2111 }
2112
2113 //---------------------------------------------------------------------
2114
scan_escape(char * c)2115 static char *scan_escape(char *c)
2116 {
2117 QByteArray cstr;
2118 char* result = scan_escape_direct(c, cstr);
2119 if (!skip_escape)
2120 out_html(cstr);
2121 return result;
2122 }
2123
2124 //---------------------------------------------------------------------
2125
2126 class TABLEROW;
2127
2128 class TABLEITEM
2129 {
2130 public:
2131 TABLEITEM(TABLEROW *row);
~TABLEITEM()2132 ~TABLEITEM()
2133 {
2134 delete [] contents;
2135 }
setContents(const char * _contents)2136 void setContents(const char *_contents)
2137 {
2138 delete [] contents;
2139 contents = qstrdup(_contents);
2140 }
getContents() const2141 const char *getContents() const
2142 {
2143 return contents;
2144 }
2145
init()2146 void init()
2147 {
2148 delete [] contents;
2149 contents = nullptr;
2150 size = 0;
2151 align = 0;
2152 valign = 0;
2153 colspan = 1;
2154 rowspan = 1;
2155 font = 0;
2156 vleft = 0;
2157 vright = 0;
2158 space = 0;
2159 width = 0;
2160 }
2161
copyLayout(const TABLEITEM * orig)2162 void copyLayout(const TABLEITEM *orig)
2163 {
2164 size = orig->size;
2165 align = orig->align;
2166 valign = orig->valign;
2167 colspan = orig->colspan;
2168 rowspan = orig->rowspan;
2169 font = orig->font;
2170 vleft = orig->vleft;
2171 vright = orig->vright;
2172 space = orig->space;
2173 width = orig->width;
2174 }
2175
2176 public:
2177 int size, align, valign, colspan, rowspan, font, vleft, vright, space, width;
2178
2179 private:
2180 char *contents;
2181 TABLEROW *_parent;
2182 };
2183
2184 class TABLEROW
2185 {
2186 char *test;
2187 public:
TABLEROW()2188 TABLEROW()
2189 {
2190 test = new char;
2191 prev = nullptr;
2192 next = nullptr;
2193 }
~TABLEROW()2194 ~TABLEROW()
2195 {
2196 qDeleteAll(items);
2197 items.clear();
2198 delete test;
2199
2200 }
length() const2201 int length() const
2202 {
2203 return items.count();
2204 }
has(int index)2205 bool has(int index)
2206 {
2207 return (index >= 0) && (index < (int)items.count());
2208 }
at(int index)2209 TABLEITEM &at(int index)
2210 {
2211 return *items.at(index);
2212 }
2213
2214 TABLEROW *copyLayout() const;
2215
addItem(TABLEITEM * item)2216 void addItem(TABLEITEM *item)
2217 {
2218 items.append(item);
2219 }
2220 TABLEROW *prev, *next;
2221
2222 private:
2223 QList<TABLEITEM*> items;
2224 };
2225
TABLEITEM(TABLEROW * row)2226 TABLEITEM::TABLEITEM(TABLEROW *row) : contents(nullptr), _parent(row)
2227 {
2228 init();
2229 _parent->addItem(this);
2230 }
2231
copyLayout() const2232 TABLEROW *TABLEROW::copyLayout() const
2233 {
2234 TABLEROW *newrow = new TABLEROW();
2235
2236 QListIterator<TABLEITEM *> it(items);
2237 while (it.hasNext())
2238 {
2239 TABLEITEM *newitem = new TABLEITEM(newrow);
2240 newitem->copyLayout(it.next());
2241 }
2242 return newrow;
2243 }
2244
2245 static const char * const tableopt[] = { "center", "expand", "box", "allbox",
2246 "doublebox", "tab", "linesize",
2247 "delim", nullptr
2248 };
2249 static const int tableoptl[] = { 6, 6, 3, 6, 9, 3, 8, 5, 0};
2250
2251
clear_table(TABLEROW * table)2252 static void clear_table(TABLEROW *table)
2253 {
2254 TABLEROW *tr1, *tr2;
2255
2256 tr1 = table;
2257 while (tr1->prev) tr1 = tr1->prev;
2258 while (tr1)
2259 {
2260 tr2 = tr1;
2261 tr1 = tr1->next;
2262 delete tr2;
2263 }
2264 }
2265
2266 //---------------------------------------------------------------------
2267
2268 static char *scan_expression(char *c, int *result);
2269
2270 //---------------------------------------------------------------------
2271
scan_format(char * c,TABLEROW ** result,int * maxcol)2272 static char *scan_format(char *c, TABLEROW **result, int *maxcol)
2273 {
2274 TABLEROW *layout, *currow;
2275 TABLEITEM *curfield;
2276 int i, j;
2277 if (*result)
2278 {
2279 clear_table(*result);
2280 }
2281 layout = currow = new TABLEROW();
2282 curfield = new TABLEITEM(currow);
2283 while (*c && *c != '.')
2284 {
2285 switch (*c)
2286 {
2287 case 'C':
2288 case 'c':
2289 case 'N':
2290 case 'n':
2291 case 'R':
2292 case 'r':
2293 case 'A':
2294 case 'a':
2295 case 'L':
2296 case 'l':
2297 case 'S':
2298 case 's':
2299 case '^':
2300 case '_':
2301 if (curfield->align)
2302 curfield = new TABLEITEM(currow);
2303 curfield->align = toupper(*c);
2304 c++;
2305 break;
2306 case 'i':
2307 case 'I':
2308 case 'B':
2309 case 'b':
2310 curfield->font = toupper(*c);
2311 c++;
2312 break;
2313 case 'f':
2314 case 'F':
2315 c++;
2316 curfield->font = toupper(*c);
2317 c++;
2318 if (!isspace(*c) && *c != '.') c++;
2319 break;
2320 case 't':
2321 case 'T':
2322 curfield->valign = 't';
2323 c++;
2324 break;
2325 case 'p':
2326 case 'P':
2327 c++;
2328 i = j = 0;
2329 if (*c == '+')
2330 {
2331 j = 1;
2332 c++;
2333 }
2334 if (*c == '-')
2335 {
2336 j = -1;
2337 c++;
2338 }
2339 while (isdigit(*c)) i = i * 10 + (*c++) - '0';
2340 if (j) curfield->size = i * j;
2341 else curfield->size = j - 10;
2342 break;
2343 case 'v':
2344 case 'V':
2345 case 'w':
2346 case 'W':
2347 c = scan_expression(c + 2, &curfield->width);
2348 break;
2349 case '|':
2350 if (curfield->align) curfield->vleft++;
2351 else curfield->vright++;
2352 c++;
2353 break;
2354 case 'e':
2355 case 'E':
2356 c++;
2357 break;
2358 case '0':
2359 case '1':
2360 case '2':
2361 case '3':
2362 case '4':
2363 case '5':
2364 case '6':
2365 case '7':
2366 case '8':
2367 case '9':
2368 i = 0;
2369 while (isdigit(*c)) i = i * 10 + (*c++) - '0';
2370 curfield->space = i;
2371 break;
2372 case ',':
2373 case '\n':
2374 currow->next = new TABLEROW();
2375 currow->next->prev = currow;
2376 currow = currow->next;
2377 currow->next = nullptr;
2378 curfield = new TABLEITEM(currow);
2379 c++;
2380 break;
2381 default:
2382 c++;
2383 break;
2384 }
2385 }
2386 if (*c == '.') while (*c++ != '\n');
2387 *maxcol = 0;
2388 currow = layout;
2389 while (currow)
2390 {
2391 i = currow->length();
2392 if (i > *maxcol) *maxcol = i;
2393 currow = currow->next;
2394 }
2395 *result = layout;
2396 return c;
2397 }
2398
2399 //---------------------------------------------------------------------
2400
next_row(TABLEROW * tr)2401 static TABLEROW *next_row(TABLEROW *tr)
2402 {
2403 if (tr->next)
2404 {
2405 tr = tr->next;
2406 if (!tr->next)
2407 return next_row(tr);
2408 return tr;
2409 }
2410 else
2411 {
2412 tr->next = tr->copyLayout();
2413 tr->next->prev = tr;
2414 return tr->next;
2415 }
2416 }
2417
2418 //---------------------------------------------------------------------
2419
2420 static char itemreset[20] = "\\fR\\s0";
2421
2422 #define FORWARDCUR do { curfield++; } while (currow->has(curfield) && currow->at(curfield).align=='S');
2423
scan_table(char * c)2424 static char *scan_table(char *c)
2425 {
2426 char *h;
2427 char *g;
2428 int center = 0, expand = 0, box = 0, border = 0, linesize = 1;
2429 int i, j, maxcol = 0, finished = 0;
2430 QByteArray oldfont;
2431 int oldsize, oldfillout;
2432 char itemsep = '\t';
2433 TABLEROW *layout = nullptr, *currow;
2434 int curfield = -1;
2435 while (*c++ != '\n');
2436 h = c;
2437 if (*h == '.') return c -1;
2438 oldfont = current_font;
2439 oldsize = current_size;
2440 oldfillout = fillout;
2441 out_html(set_font("R"));
2442 out_html(change_to_size(0));
2443 if (!fillout)
2444 {
2445 fillout = 1;
2446 out_html("</PRE>");
2447 }
2448 while (*h && *h != '\n') h++;
2449 if (h[-1] == ';')
2450 {
2451 /* scan table options */
2452 while (c < h)
2453 {
2454 while (isspace(*c)) c++;
2455 for (i = 0; tableopt[i] && qstrncmp(tableopt[i], c, tableoptl[i]); i++);
2456 c = c + tableoptl[i];
2457 switch (i)
2458 {
2459 case 0:
2460 center = 1;
2461 break;
2462 case 1:
2463 expand = 1;
2464 break;
2465 case 2:
2466 box = 1;
2467 break;
2468 case 3:
2469 border = 1;
2470 break;
2471 case 4:
2472 box = 2;
2473 break;
2474 case 5:
2475 while (*c++ != '(');
2476 itemsep = *c++;
2477 break;
2478 case 6:
2479 while (*c++ != '(');
2480 linesize = 0;
2481 while (isdigit(*c)) linesize = linesize * 10 + (*c++) - '0';
2482 break;
2483 case 7:
2484 while (*c != ')') c++;
2485 default:
2486 break;
2487 }
2488 c++;
2489 }
2490 c = h + 1;
2491 }
2492 /* scan layout */
2493 c = scan_format(c, &layout, &maxcol);
2494 // currow=layout;
2495 currow = next_row(layout);
2496 curfield = 0;
2497 i = 0;
2498 while (!finished && *c)
2499 {
2500 /* search item */
2501 h = c;
2502 if ((*c == '_' || *c == '=') && (c[1] == itemsep || c[1] == '\n'))
2503 {
2504 if (c[-1] == '\n' && c[1] == '\n')
2505 {
2506 if (currow->prev)
2507 {
2508 currow->prev->next = new TABLEROW();
2509 currow->prev->next->next = currow;
2510 currow->prev->next->prev = currow->prev;
2511 currow->prev = currow->prev->next;
2512 }
2513 else
2514 {
2515 currow->prev = layout = new TABLEROW();
2516 currow->prev->prev = nullptr;
2517 currow->prev->next = currow;
2518 }
2519 TABLEITEM *newitem = new TABLEITEM(currow->prev);
2520 newitem->align = *c;
2521 newitem->colspan = maxcol;
2522 curfield = 0;
2523 c = c + 2;
2524 }
2525 else
2526 {
2527 if (currow->has(curfield))
2528 {
2529 currow->at(curfield).align = *c;
2530 FORWARDCUR;
2531 }
2532 if (c[1] == '\n')
2533 {
2534 currow = next_row(currow);
2535 curfield = 0;
2536 }
2537 c = c + 2;
2538 }
2539 }
2540 else if (*c == 'T' && c[1] == '{')
2541 {
2542 h = c + 2;
2543 c = strstr(h, "\nT}");
2544 c++;
2545 *c = '\0';
2546 g = nullptr;
2547 scan_troff(h, 0, &g);
2548 scan_troff(itemreset, 0, &g);
2549 *c = 'T';
2550 c += 3;
2551 if (currow->has(curfield))
2552 {
2553 currow->at(curfield).setContents(g);
2554 FORWARDCUR;
2555 }
2556 delete [] g;
2557
2558 if (c[-1] == '\n')
2559 {
2560 currow = next_row(currow);
2561 curfield = 0;
2562 }
2563 }
2564 else if (*c == '.' && c[1] == 'T' && c[2] == '&' && c[-1] == '\n')
2565 {
2566 TABLEROW *hr;
2567 while (*c++ != '\n');
2568 hr = currow;
2569 currow = currow->prev;
2570 hr->prev = nullptr;
2571 c = scan_format(c, &hr, &i);
2572 hr->prev = currow;
2573 currow->next = hr;
2574 currow = hr;
2575 next_row(currow);
2576 curfield = 0;
2577 }
2578 else if (*c == '.' && c[1] == 'T' && c[2] == 'E' && c[-1] == '\n')
2579 {
2580 finished = 1;
2581 while (*c++ != '\n');
2582 if (currow->prev)
2583 currow->prev->next = nullptr;
2584 currow->prev = nullptr;
2585 clear_table(currow);
2586 currow = nullptr;
2587 }
2588 else if (*c == '.' && c[-1] == '\n' && !isdigit(c[1]))
2589 {
2590 /* skip troff request inside table (usually only .sp ) */
2591 while (*c++ != '\n');
2592 }
2593 else
2594 {
2595 h = c;
2596 while (*c && (*c != itemsep || c[-1] == '\\') &&
2597 (*c != '\n' || c[-1] == '\\')) c++;
2598 i = 0;
2599 if (*c == itemsep)
2600 {
2601 i = 1;
2602 *c = '\n';
2603 }
2604 if (h[0] == '\\' && h[2] == '\n' &&
2605 (h[1] == '_' || h[1] == '^'))
2606 {
2607 if (currow->has(curfield))
2608 {
2609 currow->at(curfield).align = h[1];
2610 FORWARDCUR;
2611 }
2612 h = h + 3;
2613 }
2614 else
2615 {
2616 g = nullptr;
2617 h = scan_troff(h, 1, &g);
2618 scan_troff(itemreset, 0, &g);
2619 if (currow->has(curfield))
2620 {
2621 currow->at(curfield).setContents(g);
2622 FORWARDCUR;
2623 }
2624 delete [] g;
2625 }
2626 if (i) *c = itemsep;
2627 c = h;
2628 if (c[-1] == '\n')
2629 {
2630 currow = next_row(currow);
2631 curfield = 0;
2632 }
2633 }
2634 }
2635 /* calculate colspan and rowspan */
2636 currow = layout;
2637 while (currow->next) currow = currow->next;
2638 while (currow)
2639 {
2640 int ti = 0, ti1 = 0, ti2 = -1;
2641 TABLEROW *prev = currow->prev;
2642 if (!prev)
2643 break;
2644
2645 while (prev->has(ti1))
2646 {
2647 if (currow->has(ti))
2648 switch (currow->at(ti).align)
2649 {
2650 case 'S':
2651 if (currow->has(ti2))
2652 {
2653 currow->at(ti2).colspan++;
2654 if (currow->at(ti2).rowspan < prev->at(ti1).rowspan)
2655 currow->at(ti2).rowspan = prev->at(ti1).rowspan;
2656 }
2657 break;
2658 case '^':
2659 if (prev->has(ti1)) prev->at(ti1).rowspan++;
2660 default:
2661 if (ti2 < 0) ti2 = ti;
2662 else
2663 {
2664 do
2665 {
2666 ti2++;
2667 }
2668 while (currow->has(ti2) && currow->at(ti2).align == 'S');
2669 }
2670 break;
2671 }
2672 ti++;
2673 if (ti1 >= 0) ti1++;
2674 }
2675 currow = currow->prev;
2676 }
2677 /* produce html output */
2678 if (center) out_html("<CENTER>");
2679 if (box == 2) out_html("<TABLE BORDER><TR><TD>");
2680 out_html("<TABLE");
2681 if (box || border)
2682 {
2683 out_html(" BORDER");
2684 if (!border) out_html("><TR><TD><TABLE");
2685 if (expand) out_html(" WIDTH=\"100%\"");
2686 }
2687 out_html(">\n");
2688 currow = layout;
2689 while (currow)
2690 {
2691 j = 0;
2692 out_html("<TR VALIGN=top>");
2693 curfield = 0;
2694 while (currow->has(curfield))
2695 {
2696 if (currow->at(curfield).align != 'S' && currow->at(curfield).align != '^')
2697 {
2698 out_html("<TD style='padding-right:10px; padding-left:10px;'");
2699 switch (currow->at(curfield).align)
2700 {
2701 case 'N':
2702 currow->at(curfield).space += 4;
2703 case 'R':
2704 out_html(" ALIGN=right");
2705 break;
2706 case 'C':
2707 out_html(" ALIGN=center");
2708 default:
2709 break;
2710 }
2711 if (!currow->at(curfield).valign && currow->at(curfield).rowspan > 1)
2712 out_html(" VALIGN=center");
2713 if (currow->at(curfield).colspan > 1)
2714 {
2715 out_html(" COLSPAN=");
2716 out_html(QByteArray::number(currow->at(curfield).colspan));
2717 }
2718 if (currow->at(curfield).rowspan > 1)
2719 {
2720 out_html(" ROWSPAN=");
2721 out_html(QByteArray::number(currow->at(curfield).rowspan));
2722 }
2723 j = j + currow->at(curfield).colspan;
2724 out_html(">");
2725 if (currow->at(curfield).size) out_html(change_to_size(currow->at(curfield).size));
2726 if (currow->at(curfield).font)
2727 out_html(set_font(QByteArray::number(currow->at(curfield).font)));
2728 switch (currow->at(curfield).align)
2729 {
2730 case '=':
2731 out_html("<HR><HR>");
2732 break;
2733 case '_':
2734 out_html("<HR>");
2735 break;
2736 default:
2737 out_html(currow->at(curfield).getContents());
2738 break;
2739 }
2740 if (currow->at(curfield).space)
2741 for (i = 0; i < currow->at(curfield).space; i++) out_html(" ");
2742 if (currow->at(curfield).font) out_html(set_font("R"));
2743 if (currow->at(curfield).size) out_html(change_to_size(0));
2744 if (j >= maxcol && currow->at(curfield).align > '@' && currow->at(curfield).align != '_')
2745 out_html("<BR>");
2746 out_html("</TD>");
2747 }
2748 curfield++;
2749 }
2750 out_html("</TR>\n");
2751 currow = currow->next;
2752 }
2753
2754 clear_table(layout);
2755
2756 if (box && !border) out_html("</TABLE>");
2757 out_html("</TABLE>");
2758 if (box == 2) out_html("</TABLE>");
2759 if (center)
2760 out_html("</CENTER>\n");
2761 else
2762 out_html("\n");
2763 if (!oldfillout) out_html("<PRE>");
2764 fillout = oldfillout;
2765 out_html(change_to_size(oldsize));
2766 out_html(set_font(oldfont));
2767 return c;
2768 }
2769
2770 //---------------------------------------------------------------------
2771
scan_expression(char * c,int * result,const unsigned int numLoop)2772 static char *scan_expression(char *c, int *result, const unsigned int numLoop)
2773 {
2774 int value = 0, value2, sign = 1, opex = 0;
2775 char oper = 'c';
2776 bool oldSkipEscape = skip_escape;
2777 skip_escape = true; // evaluating an expression shall not print it
2778
2779 if (*c == '!')
2780 {
2781 c = scan_expression(c + 1, &value);
2782 value = (!value);
2783 }
2784 else if (*c == 'n')
2785 {
2786 c++;
2787 value = s_nroff;
2788 }
2789 else if (*c == 't')
2790 {
2791 c++;
2792 value = 1 - s_nroff;
2793 }
2794 else if (*c == '\'' || *c == '"' || *c < ' ' || (*c == '\\' && c[1] == '('))
2795 {
2796 /* ?string1?string2?
2797 ** test if string1 equals string2.
2798 */
2799 char *st1 = nullptr, *st2 = nullptr, *h;
2800 char *tcmp = nullptr;
2801 char sep;
2802 sep = *c;
2803 if (sep == '\\')
2804 {
2805 tcmp = c;
2806 c = c + 3;
2807 }
2808 c++;
2809 h = c;
2810 while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4))) c++;
2811 *c = '\n';
2812 scan_troff(h, 1, &st1);
2813 *c = sep;
2814 if (tcmp) c = c + 3;
2815 c++;
2816 h = c;
2817 while (*c != sep && (!tcmp || qstrncmp(c, tcmp, 4))) c++;
2818 *c = '\n';
2819 scan_troff(h, 1, &st2);
2820 *c = sep;
2821 if (!st1 && !st2) value = 1;
2822 else if (!st1 || !st2) value = 0;
2823 else value = (!qstrcmp(st1, st2));
2824 delete [] st1;
2825 delete [] st2;
2826 if (tcmp) c = c + 3;
2827 c++;
2828 }
2829 else
2830 {
2831 while (*c && (!isspace(*c) || (numLoop > 0)) && *c != ')' && opex >= 0)
2832 {
2833 opex = 0;
2834 switch (*c)
2835 {
2836 case '(':
2837 c = scan_expression(c + 1, &value2, numLoop + 1);
2838 value2 = sign * value2;
2839 opex = 1;
2840 break;
2841 case '.':
2842 case '0':
2843 case '1':
2844 case '2':
2845 case '3':
2846 case '4':
2847 case '5':
2848 case '6':
2849 case '7':
2850 case '8':
2851 case '9':
2852 {
2853 int num = 0, denum = 1;
2854 value2 = 0;
2855 while (isdigit(*c)) value2 = value2 * 10 + ((*c++) - '0');
2856 if (*c == '.' && isdigit(c[1]))
2857 {
2858 c++;
2859 while (isdigit(*c))
2860 {
2861 num = num * 10 + ((*c++) - '0');
2862 denum = denum * 10;
2863 }
2864 }
2865 if (isalpha(*c))
2866 {
2867 /* scale indicator */
2868 switch (*c)
2869 {
2870 case 'i': /* inch -> 10pt */
2871 value2 = value2 * 10 + (num * 10 + denum / 2) / denum;
2872 num = 0;
2873 break;
2874 default:
2875 break;
2876 }
2877 c++;
2878 }
2879 value2 = value2 + (num + denum / 2) / denum;
2880 value2 = sign * value2;
2881 opex = 1;
2882 if (*c == '.')
2883 opex = -1;
2884
2885 }
2886 break;
2887 case '\\':
2888 c = scan_escape(c + 1);
2889 value2 = intresult * sign;
2890 if (isalpha(*c)) c++; /* scale indicator */
2891 opex = 1;
2892 break;
2893 case '-':
2894 if (oper)
2895 {
2896 sign = -1;
2897 c++;
2898 break;
2899 }
2900 Q_FALLTHROUGH();
2901 case '>':
2902 case '<':
2903 case '+':
2904 case '/':
2905 case '*':
2906 case '%':
2907 case '&':
2908 case '=':
2909 case ':':
2910 if (c[1] == '=') oper = (*c++) + 16;
2911 else oper = *c;
2912 c++;
2913 break;
2914 default:
2915 c++;
2916 break;
2917 }
2918 if (opex > 0)
2919 {
2920 sign = 1;
2921 switch (oper)
2922 {
2923 case 'c':
2924 value = value2;
2925 break;
2926 case '-':
2927 value = value - value2;
2928 break;
2929 case '+':
2930 value = value + value2;
2931 break;
2932 case '*':
2933 value = value * value2;
2934 break;
2935 case '/':
2936 if (value2) value = value / value2;
2937 break;
2938 case '%':
2939 if (value2) value = value % value2;
2940 break;
2941 case '<':
2942 value = (value < value2);
2943 break;
2944 case '>':
2945 value = (value > value2);
2946 break;
2947 case '>'+16:
2948 value = (value >= value2);
2949 break;
2950 case '<'+16:
2951 value = (value <= value2);
2952 break;
2953 case '=':
2954 case '='+16:
2955 value = (value == value2);
2956 break;
2957 case '&':
2958 value = (value && value2);
2959 break;
2960 case ':':
2961 value = (value || value2);
2962 break;
2963 default:
2964 {
2965 qCDebug(KIO_MAN_LOG) << "Unknown operator " << char(oper);
2966 }
2967 }
2968 oper = 0;
2969 }
2970 }
2971 if (*c == ')') c++;
2972 }
2973 *result = value;
2974
2975 skip_escape = oldSkipEscape;
2976
2977 return c;
2978 }
2979
2980 //---------------------------------------------------------------------
2981
scan_expression(char * c,int * result)2982 static char *scan_expression(char *c, int *result)
2983 {
2984 return scan_expression(c, result, 0);
2985 }
2986
2987 //---------------------------------------------------------------------
2988
trans_char(char * c,char s,char t)2989 static void trans_char(char *c, char s, char t)
2990 {
2991 char *sl = c;
2992 int slash = 0;
2993 while (*sl != '\n' || slash)
2994 {
2995 if (!slash)
2996 {
2997 if (*sl == escapesym)
2998 slash = 1;
2999 else if (*sl == s)
3000 *sl = t;
3001 }
3002 else slash = 0;
3003 sl++;
3004 }
3005 }
3006
3007 //---------------------------------------------------------------------
3008 // parse 1 line (or a line which stretches multiple lines by \(enter) )
3009 // return all arguments starting at \p c in \p args
3010 // returns the pointer to the next char where scanning should continue
3011 // (which is the char after the ending \n)
3012 // argPointers .. a list of pointers to the startchars of each arg pointing into the string given with c
3013
getArguments(char * & c,QList<QByteArray> & args,QList<char * > * argPointers=nullptr)3014 void getArguments(/* const */ char *&c, QList<QByteArray> &args, QList<char*> *argPointers = nullptr)
3015 {
3016 args.clear();
3017 if ( argPointers )
3018 argPointers->clear();
3019
3020 QByteArray arg;
3021 arg.reserve(30); // reduce num of reallocs
3022 bool inString = false;
3023 bool inArgument = false;
3024
3025 for (; *c && (*c != '\n'); c++)
3026 {
3027 if ( *c == '"' )
3028 {
3029 if ( !inString )
3030 {
3031 inString = true; // start of quoted argument
3032 }
3033 else
3034 {
3035 // according to http://heirloom.sourceforge.net/doctools/troff.pdf chapter 7.3
3036 // two consecutive quotes inside a string is one quote char
3037 if ( *(c+1) == '"' )
3038 {
3039 arg += '"';
3040 c++;
3041 }
3042 else // end of quoted argument
3043 {
3044 args.append(arg);
3045 arg.clear();
3046 inString = false;
3047 inArgument = false;
3048 }
3049 }
3050 }
3051 else if ( *c == ' ' )
3052 {
3053 if ( inString )
3054 {
3055 arg += *c;
3056 if ( !inArgument ) // argument not yet found (leading spaces)
3057 {
3058 inArgument = true;
3059
3060 if ( argPointers )
3061 argPointers->append(c);
3062 }
3063 }
3064 else if ( inArgument )
3065 {
3066 // end of previous argument
3067 args.append(arg);
3068 arg.clear();
3069 inArgument = false;
3070 }
3071 }
3072 else if ( (*c == escapesym) && (*(c+1) == ' ') )
3073 {
3074 // special handling \<SP> shall be kept as is
3075 arg += *c++;
3076 arg += *c;
3077
3078 if ( !inArgument ) // argument not yet found (leading spaces)
3079 {
3080 inArgument = true;
3081
3082 if ( argPointers )
3083 argPointers->append(c);
3084 }
3085 }
3086 else if ( (*c == escapesym) && (*(c+1) == '\n') )
3087 {
3088 c++;
3089 }
3090 else if ( (*c == escapesym) && (*(c+1) == '"') ) // start of comment; skip rest of line
3091 {
3092 if ( inArgument )
3093 {
3094 // end of previous argument
3095 args.append(arg);
3096 arg.clear();
3097 inArgument = false;
3098 }
3099
3100 // skip rest of line
3101 while ( *c && (*c != '\n') ) c++;
3102 break;
3103 }
3104 else if ( *c != ' ' )
3105 {
3106 arg += *c;
3107 if ( !inArgument ) // argument not yet found (leading spaces)
3108 {
3109 inArgument = true;
3110
3111 if ( argPointers )
3112 argPointers->append(c);
3113 }
3114 }
3115 }
3116
3117 if ( inArgument )
3118 {
3119 // end of previous argument
3120 args.append(arg);
3121 }
3122
3123 if ( *c ) c++;
3124 }
3125
3126 //---------------------------------------------------------------------
3127
3128 static const char * const abbrev_list[] =
3129 {
3130 "GSBG", "Getting Started ",
3131 "SUBG", "Customizing SunOS",
3132 "SHBG", "Basic Troubleshooting",
3133 "SVBG", "SunView User's Guide",
3134 "MMBG", "Mail and Messages",
3135 "DMBG", "Doing More with SunOS",
3136 "UNBG", "Using the Network",
3137 "GDBG", "Games, Demos & Other Pursuits",
3138 "CHANGE", "SunOS 4.1 Release Manual",
3139 "INSTALL", "Installing SunOS 4.1",
3140 "ADMIN", "System and Network Administration",
3141 "SECUR", "Security Features Guide",
3142 "PROM", "PROM User's Manual",
3143 "DIAG", "Sun System Diagnostics",
3144 "SUNDIAG", "Sundiag User's Guide",
3145 "MANPAGES", "SunOS Reference Manual",
3146 "REFMAN", "SunOS Reference Manual",
3147 "SSI", "Sun System Introduction",
3148 "SSO", "System Services Overview",
3149 "TEXT", "Editing Text Files",
3150 "DOCS", "Formatting Documents",
3151 "TROFF", "Using <B>nroff</B> and <B>troff</B>",
3152 "INDEX", "Global Index",
3153 "CPG", "C Programmer's Guide",
3154 "CREF", "C Reference Manual",
3155 "ASSY", "Assembly Language Reference",
3156 "PUL", "Programming Utilities and Libraries",
3157 "DEBUG", "Debugging Tools",
3158 "NETP", "Network Programming",
3159 "DRIVER", "Writing Device Drivers",
3160 "STREAMS", "STREAMS Programming",
3161 "SBDK", "SBus Developer's Kit",
3162 "WDDS", "Writing Device Drivers for the SBus",
3163 "FPOINT", "Floating-Point Programmer's Guide",
3164 "SVPG", "SunView 1 Programmer's Guide",
3165 "SVSPG", "SunView 1 System Programmer's Guide",
3166 "PIXRCT", "Pixrect Reference Manual",
3167 "CGI", "SunCGI Reference Manual",
3168 "CORE", "SunCore Reference Manual",
3169 "4ASSY", "Sun-4 Assembly Language Reference",
3170 "SARCH", "<FONT SIZE=\"-1\">SPARC</FONT> Architecture Manual",
3171 "KR", "The C Programming Language",
3172 nullptr, nullptr
3173 };
3174
lookup_abbrev(const char * c)3175 static const char *lookup_abbrev(const char *c)
3176 {
3177 int i = 0;
3178
3179 if (!c) return "";
3180 while (abbrev_list[i] && qstrcmp(c, abbrev_list[i])) i = i + 2;
3181 if (abbrev_list[i])
3182 return abbrev_list[i+1];
3183 else
3184 return c;
3185 }
3186
3187 //---------------------------------------------------------------------
3188
3189 static const char * const section_list[] =
3190 {
3191 #ifdef Q_OS_SOLARIS
3192 // for Solaris
3193 "1", "User Commands",
3194 "1B", "SunOS/BSD Compatibility Package Commands",
3195 "1b", "SunOS/BSD Compatibility Package Commands",
3196 "1C", "Communication Commands ",
3197 "1c", "Communication Commands",
3198 "1F", "FMLI Commands ",
3199 "1f", "FMLI Commands",
3200 "1G", "Graphics and CAD Commands ",
3201 "1g", "Graphics and CAD Commands ",
3202 "1M", "Maintenance Commands",
3203 "1m", "Maintenance Commands",
3204 "1S", "SunOS Specific Commands",
3205 "1s", "SunOS Specific Commands",
3206 "2", "System Calls",
3207 "3", "C Library Functions",
3208 "3B", "SunOS/BSD Compatibility Library Functions",
3209 "3b", "SunOS/BSD Compatibility Library Functions",
3210 "3C", "C Library Functions",
3211 "3c", "C Library Functions",
3212 "3E", "C Library Functions",
3213 "3e", "C Library Functions",
3214 "3F", "Fortran Library Routines",
3215 "3f", "Fortran Library Routines",
3216 "3G", "C Library Functions",
3217 "3g", "C Library Functions",
3218 "3I", "Wide Character Functions",
3219 "3i", "Wide Character Functions",
3220 "3K", "Kernel VM Library Functions",
3221 "3k", "Kernel VM Library Functions",
3222 "3L", "Lightweight Processes Library",
3223 "3l", "Lightweight Processes Library",
3224 "3M", "Mathematical Library",
3225 "3m", "Mathematical Library",
3226 "3N", "Network Functions",
3227 "3n", "Network Functions",
3228 "3R", "Realtime Library",
3229 "3r", "Realtime Library",
3230 "3S", "Standard I/O Functions",
3231 "3s", "Standard I/O Functions",
3232 "3T", "Threads Library",
3233 "3t", "Threads Library",
3234 "3W", "C Library Functions",
3235 "3w", "C Library Functions",
3236 "3X", "Miscellaneous Library Functions",
3237 "3x", "Miscellaneous Library Functions",
3238 "4", "File Formats",
3239 "4B", "SunOS/BSD Compatibility Package File Formats",
3240 "4b", "SunOS/BSD Compatibility Package File Formats",
3241 "5", "Headers, Tables, and Macros",
3242 "6", "Games and Demos",
3243 "7", "Special Files",
3244 "7B", "SunOS/BSD Compatibility Special Files",
3245 "7b", "SunOS/BSD Compatibility Special Files",
3246 "8", "Maintenance Procedures",
3247 "8C", "Maintenance Procedures",
3248 "8c", "Maintenance Procedures",
3249 "8S", "Maintenance Procedures",
3250 "8s", "Maintenance Procedures",
3251 "9", "DDI and DKI",
3252 "9E", "DDI and DKI Driver Entry Points",
3253 "9e", "DDI and DKI Driver Entry Points",
3254 "9F", "DDI and DKI Kernel Functions",
3255 "9f", "DDI and DKI Kernel Functions",
3256 "9S", "DDI and DKI Data Structures",
3257 "9s", "DDI and DKI Data Structures",
3258 "L", "Local Commands",
3259 #elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
3260 "1", "General Commands",
3261 "2", "System Calls",
3262 "3", "Library Functions",
3263 "4", "Kernel Interfaces",
3264 "5", "File Formats",
3265 "6", "Games",
3266 "7", "Miscellaneous Information",
3267 "8", "System Manager's Manuals",
3268 "9", "Kernel Developer's Manuals",
3269 #else
3270 // Other OS
3271 "1", "User Commands ",
3272 "1C", "User Commands",
3273 "1G", "User Commands",
3274 "1S", "User Commands",
3275 "1V", "User Commands ",
3276 "2", "System Calls",
3277 "2V", "System Calls",
3278 "3", "C Library Functions",
3279 "3C", "Compatibility Functions",
3280 "3F", "Fortran Library Routines",
3281 "3K", "Kernel VM Library Functions",
3282 "3L", "Lightweight Processes Library",
3283 "3M", "Mathematical Library",
3284 "3N", "Network Functions",
3285 "3R", "RPC Services Library",
3286 "3S", "Standard I/O Functions",
3287 "3V", "C Library Functions",
3288 "3X", "Miscellaneous Library Functions",
3289 "4", "Devices and Network Interfaces",
3290 "4F", "Protocol Families",
3291 "4I", "Devices and Network Interfaces",
3292 "4M", "Devices and Network Interfaces",
3293 "4N", "Devices and Network Interfaces",
3294 "4P", "Protocols",
3295 "4S", "Devices and Network Interfaces",
3296 "4V", "Devices and Network Interfaces",
3297 "5", "File Formats",
3298 "5V", "File Formats",
3299 "6", "Games and Demos",
3300 "7", "Environments, Tables, and Troff Macros",
3301 "7V", "Environments, Tables, and Troff Macros",
3302 "8", "Maintenance Commands",
3303 "8C", "Maintenance Commands",
3304 "8S", "Maintenance Commands",
3305 "8V", "Maintenance Commands",
3306 "L", "Local Commands",
3307 #endif
3308 // The defaults
3309 nullptr, "Misc. Reference Manual Pages",
3310 nullptr, nullptr
3311 };
3312
section_name(char * c)3313 static const char *section_name(char *c)
3314 {
3315 int i = 0;
3316
3317 if (!c) return "";
3318 while (section_list[i] && qstrcmp(c, section_list[i])) i = i + 2;
3319 if (section_list[i+1]) return section_list[i+1];
3320 else return c;
3321 }
3322
skip_till_newline(char * c)3323 static char *skip_till_newline(char *c)
3324 {
3325 int lvl = 0;
3326
3327 while (*c && (*c != '\n' || lvl > 0))
3328 {
3329 if (*c == '\\')
3330 {
3331 c++;
3332 if (*c == '}')
3333 lvl--;
3334 else if (*c == '{')
3335 lvl++;
3336 else if (*c == '\0')
3337 break;
3338 }
3339 c++;
3340 }
3341 if (*c) c++;
3342 if (lvl < 0 && newline_for_fun)
3343 {
3344 newline_for_fun = newline_for_fun + lvl;
3345 if (newline_for_fun < 0) newline_for_fun = 0;
3346 }
3347 return c;
3348 }
3349
3350 //---------------------------------------------------------------------
3351
3352 static bool s_whileloop = false;
3353
3354 /// Processing the .while request
request_while(char * & c,int j,bool mdoc)3355 static void request_while(char*& c, int j, bool mdoc)
3356 {
3357 // ### TODO: .continue
3358 qCDebug(KIO_MAN_LOG) << "Entering .while";
3359 c += j;
3360 char* newline = skip_till_newline(c);
3361 const char oldchar = *newline;
3362 *newline = 0;
3363 // We store the full .while stuff into a QByteArray as if it would be a macro
3364 const QByteArray macro = c ;
3365 qCDebug(KIO_MAN_LOG) << "'Macro' of .while" << BYTEARRAY(macro);
3366 // Prepare for continuing after .while loop end
3367 *newline = oldchar;
3368 c = newline;
3369 // Process -while loop
3370 const bool oldwhileloop = s_whileloop;
3371 s_whileloop = true;
3372 int result = true; // It must be an int due to the call to scan_expression
3373 break_the_while_loop = false;
3374 while (result && !break_the_while_loop)
3375 {
3376 // Unlike for a normal macro, we have the condition at start, so we do not need to prepend extra bytes
3377 char* liveloop = qstrdup(macro.data());
3378 qCDebug(KIO_MAN_LOG) << "Scanning .while condition";
3379 qCDebug(KIO_MAN_LOG) << "Loop macro " << liveloop;
3380 char* end_expression = scan_expression(liveloop, &result);
3381 qCDebug(KIO_MAN_LOG) << "After " << end_expression;
3382 if (result)
3383 {
3384 qCDebug(KIO_MAN_LOG) << "New .while iteration";
3385 // The condition is true, so call the .while's content
3386 char* help = end_expression + 1;
3387 while (*help && (*help == ' ' || *help == '\t'))
3388 ++help;
3389 if (! *help)
3390 {
3391 // We have a problem, so stop .while
3392 result = false;
3393 break;
3394 }
3395 if (mdoc)
3396 scan_troff_mandoc(help, false, nullptr);
3397 else
3398 scan_troff(help, false, nullptr);
3399 }
3400 delete[] liveloop;
3401 }
3402 break_the_while_loop = false;
3403
3404 //
3405 s_whileloop = oldwhileloop;
3406 qCDebug(KIO_MAN_LOG) << "Ending .while";
3407 }
3408
3409 //---------------------------------------------------------------------
3410 // Processing mixed fonts requests like .BI
3411
request_mixed_fonts(char * & c,int j,const char * font1,const char * font2,const bool mode,const bool inFMode)3412 static void request_mixed_fonts(char*& c, int j, const char* font1, const char* font2, const bool mode, const bool inFMode)
3413 {
3414 c += j;
3415 if (*c == '\n') c++;
3416
3417 QList<QByteArray> args;
3418 getArguments(c, args);
3419
3420 for (int i = 0; i < args.count(); i++)
3421 {
3422 if (mode || inFMode)
3423 {
3424 out_html(" ");
3425 curpos++;
3426 }
3427 out_html(set_font((i&1) ? font2 : font1));
3428 scan_troff(args[i].data(), 1, nullptr);
3429 }
3430 out_html(set_font("R"));
3431 if (mode)
3432 {
3433 out_html(" ]");
3434 curpos++;
3435 }
3436 out_html(NEWLINE);
3437 if (!fillout)
3438 curpos = 0;
3439 else
3440 curpos++;
3441 }
3442
3443 //---------------------------------------------------------------------
3444
3445 // &%(#@ c programs !!!
3446 //static int ifelseval=0;
3447 // If/else can be nested!
3448 static QStack<int> s_ifelseval;
3449
3450 //---------------------------------------------------------------------
3451
3452 // Process a (mdoc) request involving quotes
process_quote(char * c,int j,const char * open,const char * close)3453 static char* process_quote(char* c, int j, const char* open, const char* close)
3454 {
3455 trans_char(c, '"', '\a');
3456 c += j;
3457 if (*c == '\n') c++; // ### TODO: why? Quote requests cannot be empty!
3458 out_html(open);
3459 c = scan_troff_mandoc(c, 1, nullptr);
3460 out_html(close);
3461 out_html(NEWLINE);
3462 if (fillout)
3463 curpos++;
3464 else
3465 curpos = 0;
3466 return c;
3467 }
3468
3469 //---------------------------------------------------------------------
3470 /**
3471 * Is the char \p ch a punctuation in sense of mdoc(7)
3472 */
3473
is_mdoc_punctuation(const char ch)3474 static bool is_mdoc_punctuation(const char ch)
3475 {
3476 if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
3477 return false;
3478 else if (ch == '.' || ch == ',' || ch == ';' || ch == ':' || ch == '(' || ch == ')'
3479 || ch == '[' || ch == ']')
3480 return true;
3481 else
3482 return false;
3483 }
3484
3485 //---------------------------------------------------------------------
3486 /**
3487 * Can the char \p c be part of an identifier
3488 * \note For groff, an identifier can consist of nearly all ASCII printable non-white-space characters
3489 * See info:/groff/Identifiers
3490 */
3491
is_identifier_char(const char c)3492 static bool is_identifier_char(const char c)
3493 {
3494 if (c >= '!' && c <= '[') // Include digits and upper case
3495 return true;
3496 else if (c >= ']' && c <= '~') // Include lower case
3497 return true;
3498 else if (c == '\\')
3499 return false; // ### TODO: it should be treated as escape instead!
3500 return false;
3501 }
3502
3503 //---------------------------------------------------------------------
3504
scan_identifier(char * & c)3505 static QByteArray scan_identifier(char*& c)
3506 {
3507 char* h = c; // help pointer
3508 // ### TODO Groff seems to eat nearly everything as identifier name (info:/groff/Identifiers)
3509 while (*h && *h != '\a' && *h != '\n' && is_identifier_char(*h))
3510 ++h;
3511 const char tempchar = *h;
3512 *h = 0;
3513 const QByteArray name = c;
3514 *h = tempchar;
3515 if (name.isEmpty())
3516 {
3517 qCDebug(KIO_MAN_LOG) << "EXCEPTION: identifier empty!";
3518 }
3519 c = h;
3520 return name;
3521 }
3522
3523 //---------------------------------------------------------------------
3524
scan_request(char * c)3525 static char *scan_request(char *c)
3526 {
3527 // mdoc(7) stuff
3528 static bool mandoc_synopsis = false; /* True if we are in the synopsis section */
3529 static bool mandoc_command = false; /* True if this is mdoc(7) page */
3530 static int mandoc_bd_options; /* Only copes with non-nested Bd's */
3531 static int function_argument = 0; // Number of function argument (.Fo, .Fa, .Fc)
3532
3533 int i = 0;
3534 bool mode = false;
3535 char *h = nullptr;
3536 char *sl;
3537 QList<QByteArray> args;
3538
3539 while (*c == ' ' || *c == '\t') c++; // Spaces or tabs allowed between control character and request
3540 if (c[0] == '\n') return c + 1;
3541 if (c[0] == escapesym)
3542 {
3543 /* some pages use .\" .\$1 .\} */
3544 /* .\$1 is too difficult/stuppid */
3545 if (c[1] == '$')
3546 {
3547 qCDebug(KIO_MAN_LOG) << "Found .\\$";
3548 c = skip_till_newline(c); // ### TODO
3549 }
3550 else
3551 {
3552 // the result of the escape expansion must be parsed again
3553 c++;
3554 QByteArray cstr;
3555 c = scan_escape_direct(c, cstr);
3556 for (; *c && (*c != '\n'); c++) cstr += *c;
3557 if ( cstr.length() )
3558 scan_request(cstr.data());
3559 }
3560 }
3561 else
3562 {
3563 int nlen = 0;
3564 QByteArray macroName;
3565 while (c[nlen] && (c[nlen] != ' ') && (c[nlen] != '\t') && (c[nlen] != '\n') && (c[nlen] != escapesym))
3566 {
3567 macroName += c[nlen];
3568 nlen++;
3569 }
3570 int j = nlen;
3571 while (c[j] == ' ' || c[j] == '\t') j++;
3572 /* search macro database of self-defined macros */
3573 QMap<QByteArray, StringDefinition>::const_iterator it = s_stringDefinitionMap.constFind(macroName);
3574
3575 // ### HACK: e.g. nmap, smb.conf redefine SH, SS to increase the font, etc. for non-TTY output
3576 // Ignore those to make the HTML result look better
3577 if ( (macroName != "SH") && (macroName != "SS") &&
3578 it != s_stringDefinitionMap.constEnd() )
3579 {
3580 qCDebug(KIO_MAN_LOG) << "CALLING MACRO: " << BYTEARRAY(macroName);
3581 const QByteArray oldDollarZero = s_dollarZero; // Previous value of $0
3582 s_dollarZero = macroName;
3583
3584 c += j;
3585 getArguments(c, args);
3586 for (i = 0; i < args.count(); i++)
3587 {
3588 char *h = nullptr;
3589
3590 if (mandoc_command)
3591 scan_troff_mandoc(args[i].data(), 1, &h);
3592 else
3593 scan_troff(args[i].data(), 1, &h);
3594
3595 args[i] = h;
3596 delete [] h;
3597 }
3598
3599 if (!(*it).m_output.isEmpty())
3600 {
3601 //qCDebug(KIO_MAN_LOG) << "Macro content is: "<< BYTEARRAY( (*it).m_output );
3602 const unsigned int length = (*it).m_output.length();
3603 char* work = new char [length+2];
3604 work[0] = '\n'; // The macro must start after an end of line to allow a request on first line
3605 qstrncpy(work + 1, (*it).m_output.data(), length + 1);
3606 const QList<QByteArray> oldArgumentList(s_argumentList);
3607 s_argumentList.clear();
3608 for (i = 0; i < args.count(); i++)
3609 s_argumentList.push_back(args[i]);
3610
3611 const int onff = newline_for_fun;
3612 if (mandoc_command)
3613 scan_troff_mandoc(work + 1, 0, nullptr);
3614 else
3615 scan_troff(work + 1, 0, nullptr);
3616 delete[] work;
3617 newline_for_fun = onff;
3618 s_argumentList = oldArgumentList;
3619 }
3620 s_dollarZero = oldDollarZero;
3621 qCDebug(KIO_MAN_LOG) << "ENDING MACRO: " << BYTEARRAY(macroName);
3622 }
3623 else
3624 {
3625 qCDebug(KIO_MAN_LOG) << "REQUEST: " << BYTEARRAY(macroName);
3626 switch (RequestNum request = RequestHash::getRequest(macroName, macroName.length()))
3627 {
3628 case REQ_ab: // groff(7) "ABort"
3629 {
3630 h = c + j;
3631 while (*h && *h != '\n') h++;
3632 *h = '\0';
3633 if (scaninbuff && buffpos)
3634 {
3635 buffer[buffpos] = '\0';
3636 qCDebug(KIO_MAN_LOG) << "ABORT: " << buffer;
3637 }
3638 // ### TODO find a way to display it to the user
3639 qCDebug(KIO_MAN_LOG) << "Aborting: .ab " << (c + j);
3640 return nullptr;
3641 break;
3642 }
3643 case REQ_An: // mdoc(7) "Author Name"
3644 {
3645 c += j;
3646 c = scan_troff_mandoc(c, 1, nullptr);
3647 break;
3648 }
3649 case REQ_di: // groff(7) "end current DIversion"
3650 {
3651 qCDebug(KIO_MAN_LOG) << "Start .di";
3652 c += j;
3653 if (*c == '\n')
3654 {
3655 ++c;
3656 break;
3657 }
3658 const QByteArray name(scan_identifier(c));
3659 while (*c && *c != '\n') c++;
3660 c++;
3661 h = c;
3662 while (*c && qstrncmp(c, ".di", 3)) while (*c && *c++ != '\n');
3663 *c = '\0';
3664 char* result = nullptr;
3665 scan_troff(h, 0, &result);
3666 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
3667 if (it == s_stringDefinitionMap.end())
3668 {
3669 StringDefinition def;
3670 def.m_length = 0;
3671 def.m_output = result;
3672 s_stringDefinitionMap.insert(name, def);
3673 }
3674 else
3675 {
3676 (*it).m_length = 0;
3677 (*it).m_output = result;
3678 }
3679 delete[] result;
3680 if (*c) *c = '.';
3681 c = skip_till_newline(c);
3682 qCDebug(KIO_MAN_LOG) << "end .di";
3683 break;
3684 }
3685 case REQ_ds: // groff(7) "Define String variable"
3686 mode = true;
3687 Q_FALLTHROUGH();
3688 case REQ_as: // groff (7) "Append String variable"
3689 {
3690 qCDebug(KIO_MAN_LOG) << "start .ds/.as";
3691 int oldcurpos = curpos;
3692 c += j;
3693 const QByteArray name(scan_identifier(c));
3694 if (name.isEmpty())
3695 break;
3696 // an initial " is removed to allow leading space
3697 while (*c && isspace(*c)) c++;
3698 if (*c == '"') c++;
3699
3700 single_escape = true;
3701 curpos = 0;
3702 char* result = nullptr;
3703 c = scan_troff(c, 1, &result);
3704 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
3705 if (it == s_stringDefinitionMap.end())
3706 {
3707 StringDefinition def;
3708 def.m_length = curpos;
3709 def.m_output = result;
3710 s_stringDefinitionMap.insert(name, def);
3711 }
3712 else
3713 {
3714 if (mode)
3715 { // .ds Defining String
3716 (*it).m_length = curpos;
3717 (*it).m_output = result;
3718 }
3719 else
3720 { // .as Appending String
3721 (*it).m_length += curpos;
3722 (*it).m_output += result;
3723 }
3724 }
3725 delete[] result;
3726 single_escape = false;
3727 curpos = oldcurpos;
3728 qCDebug(KIO_MAN_LOG) << "end .ds/.as";
3729 break;
3730 }
3731 case REQ_br: // groff(7) "line BReak"
3732 {
3733 if (still_dd)
3734 out_html("<DD>"); // ### VERIFY (does not look like generating good HTML)
3735 else
3736 out_html("<BR>\n");
3737 curpos = 0;
3738 c = c + j;
3739 if (c[0] == escapesym) c = scan_escape(c + 1);
3740 c = skip_till_newline(c);
3741 break;
3742 }
3743 case REQ_c2: // groff(7) "reset non-break Control character" (2 means non-break)
3744 {
3745 c = c + j;
3746 if (*c != '\n')
3747 nobreaksym = *c;
3748 else
3749 nobreaksym = '\'';
3750 c = skip_till_newline(c);
3751 break;
3752 }
3753 case REQ_cc: // groff(7) "reset Control Character"
3754 {
3755 c = c + j;
3756 if (*c != '\n')
3757 controlsym = *c;
3758 else
3759 controlsym = '.';
3760 c = skip_till_newline(c);
3761 break;
3762 }
3763 case REQ_ce: // groff (7) "CEnter"
3764 {
3765 c = c + j;
3766 if (*c == '\n')
3767 i = 1;
3768 else
3769 {
3770 i = 0;
3771 while ('0' <= *c && *c <= '9')
3772 {
3773 i = i * 10 + *c - '0';
3774 c++;
3775 }
3776 }
3777 c = skip_till_newline(c);
3778 /* center next i lines */
3779 if (i > 0)
3780 {
3781 out_html("<CENTER>\n");
3782 while (i && *c)
3783 {
3784 char *line = nullptr;
3785 c = scan_troff(c, 1, &line);
3786 if (line && qstrncmp(line, "<BR>", 4))
3787 {
3788 out_html(line);
3789 out_html("<BR>\n");
3790 delete [] line; // ### FIXME: memory leak!
3791 i--;
3792 }
3793 }
3794 out_html("</CENTER>\n");
3795 curpos = 0;
3796 }
3797 break;
3798 }
3799 case REQ_ec: // groff(7) "reset Escape Character"
3800 {
3801 c = c + j;
3802 if (*c != '\n')
3803 escapesym = *c;
3804 else
3805 escapesym = '\\';
3806 break;
3807 }
3808 case REQ_eo: // groff(7) "turn Escape character Off"
3809 {
3810 escapesym = '\0';
3811 c = skip_till_newline(c);
3812 break;
3813 }
3814 case REQ_ex: // groff(7) "EXit"
3815 {
3816 return nullptr;
3817 break;
3818 }
3819 case REQ_fc: // groff(7) "set Field and pad Character"
3820 {
3821 c = c + j;
3822 if (*c == '\n')
3823 fieldsym = padsym = '\0';
3824 else
3825 {
3826 fieldsym = c[0];
3827 padsym = c[1];
3828 }
3829 c = skip_till_newline(c);
3830 break;
3831 }
3832 case REQ_fi: // groff(7) "FIll"
3833 {
3834 if (!fillout)
3835 {
3836 out_html(set_font("R"));
3837 out_html(change_to_size('0'));
3838 out_html("</PRE>\n");
3839 }
3840 curpos = 0;
3841 fillout = 1;
3842 c = skip_till_newline(c);
3843 break;
3844 }
3845 case REQ_ft: // groff(7) "FonT"
3846 {
3847 c += j;
3848 h = skip_till_newline(c);
3849 const char oldChar = *h;
3850 *h = 0;
3851 const QByteArray name = c;
3852 // ### TODO: name might contain a variable
3853 if (name.isEmpty())
3854 out_html(set_font("P")); // Previous font
3855 else
3856 out_html(set_font(name));
3857 *h = oldChar;
3858 c = h;
3859 break;
3860 }
3861 case REQ_el: // groff(7) "ELse"
3862 {
3863 int ifelseval = s_ifelseval.pop();
3864 /* .el anything : else part of if else */
3865 if (ifelseval)
3866 {
3867 c = c + j;
3868 c[-1] = '\n';
3869 c = scan_troff(c, 1, nullptr);
3870 }
3871 else
3872 c = skip_till_newline(c + j);
3873 break;
3874 }
3875 case REQ_ie: // groff(7) "If with Else"
3876 /* .ie c anything : then part of if else */
3877 // fallthrough
3878 case REQ_if: // groff(7) "IF"
3879 {
3880 /* .if c anything
3881 * .if !c anything
3882 * .if N anything
3883 * .if !N anything
3884 * .if 'string1'string2' anything
3885 * .if !'string1'string2' anything
3886 */
3887 c = c + j;
3888 c = scan_expression(c, &i);
3889 if (request == REQ_ie)
3890 {
3891 int ifelseval = !i;
3892 s_ifelseval.push(ifelseval);
3893 }
3894 if (i)
3895 {
3896 *c = '\n';
3897 c++;
3898 c = scan_troff(c, 1, nullptr);
3899 }
3900 else
3901 c = skip_till_newline(c);
3902 break;
3903 }
3904 case REQ_ig: // groff(7) "IGnore"
3905 {
3906 const char *endwith = "..\n";
3907 i = 3;
3908 c = c + j;
3909 if (*c != '\n' && *c != '\\')
3910 {
3911 /* Not newline or comment */
3912 endwith = c - 1;
3913 i = 1;
3914 c[-1] = '.';
3915 while (*c && *c != '\n') c++, i++;
3916 }
3917 c++;
3918 while (*c && qstrncmp(c, endwith, i)) while (*c++ != '\n');
3919 while (*c && *c++ != '\n');
3920 break;
3921 }
3922 case REQ_nf: // groff(7) "No Filling"
3923 {
3924 if (fillout)
3925 {
3926 out_html(set_font("R"));
3927 out_html(change_to_size('0'));
3928 out_html("<PRE>\n");
3929 }
3930 curpos = 0;
3931 fillout = 0;
3932 c = skip_till_newline(c);
3933 break;
3934 }
3935 case REQ_ps: // groff(7) "previous Point Size"
3936 {
3937 c += j;
3938 getArguments(c, args);
3939 if ( args.count() == 0 )
3940 out_html(change_to_size('0'));
3941 else
3942 {
3943 char *h = args[0].data();
3944 int sign = 0;
3945 i = 0;
3946 if (*h == '-')
3947 {
3948 sign = -1;
3949 h++;
3950 }
3951 else if (*h == '+') {
3952 sign = 1;
3953 h++;
3954 }
3955 scan_expression(h, &i);
3956 if (sign == 0)
3957 {
3958 sign = 1;
3959 if (i > 5) i = i - 10;
3960 }
3961 out_html(change_to_size(sign * i));
3962 }
3963 break;
3964 }
3965 case REQ_sp: // groff(7) "SKip one line"
3966 {
3967 c += j;
3968 if (fillout)
3969 out_html("<br><br>");
3970 else
3971 out_html(NEWLINE);
3972 curpos = 0;
3973 c = skip_till_newline(c);
3974 break;
3975 }
3976 case REQ_so: // groff(7) "Include SOurce file"
3977 {
3978 char *buf;
3979 char *name = nullptr;
3980 curpos = 0;
3981 c = c + j;
3982 if (*c == '/')
3983 h = c;
3984 else
3985 {
3986 h = c - 3;
3987 h[0] = '.';
3988 h[1] = '.';
3989 h[2] = '/';
3990 }
3991 while (*c != '\n') c++;
3992 *c = '\0';
3993 scan_troff(h, 1, &name);
3994 if (name[3] == '/')
3995 h = name + 3;
3996 else
3997 h = name;
3998
3999 // The format of the argument to .so varies among man pages.
4000 // Some of them, e.g. pam.8, use "PAM.8". Others, e.g. telinit.8,
4001 // use "man8/init.8". So they are not always true relative paths,
4002 // although the man(1) command seems to handle them with no problem.
4003 //
4004 // The code above starting "h = c - 3" attempts to turn the argument
4005 // into a relative path, but that is not correct in the case of pam.8
4006 // as above. So this removes the "../" prefix again if there is
4007 // no other slash following it.
4008 char *firstSlash = strchr(h, '/');
4009 if (firstSlash != 0)
4010 {
4011 char *nextSlash = strchr(firstSlash + 1, '/');
4012 if (nextSlash == 0)
4013 h = firstSlash + 1;
4014 }
4015
4016 /* this works alright, except for section 3 */
4017 buf = read_man_page(h);
4018 if (!buf)
4019 {
4020 qCDebug(KIO_MAN_LOG) << "Unable to open or read file: .so " << (h);
4021 out_html("<BLOCKQUOTE>"
4022 "man2html: unable to open or read file.\n");
4023 out_html(h);
4024 out_html("</BLOCKQUOTE>\n");
4025 }
4026 else
4027 scan_troff(buf + 1, 0, nullptr);
4028 delete [] buf;
4029 delete [] name;
4030
4031 *c++ = '\n';
4032 break;
4033 }
4034 case REQ_ta: // gorff(7) "set TAbulators"
4035 {
4036 c = c + j;
4037 j = 0;
4038 while (*c != '\n')
4039 {
4040 sl = scan_expression(c, &tabstops[j]);
4041 if (j > 0 && (*c == '-' || *c == '+')) tabstops[j] += tabstops[j-1];
4042 c = sl;
4043 while (*c == ' ' || *c == '\t') c++;
4044 j++;
4045 }
4046 maxtstop = j;
4047 curpos = 0;
4048 break;
4049 }
4050 case REQ_ti: // groff(7) "Temporary Indent"
4051 {
4052 /*while (itemdepth || dl_set[itemdepth]) {
4053 out_html("</DL>\n");
4054 if (dl_set[itemdepth]) dl_set[itemdepth]=0;
4055 else itemdepth--;
4056 }*/
4057 out_html("<BR>\n");
4058 c = c + j;
4059 c = scan_expression(c, &j);
4060 for (i = 0; i < j; i++) out_html(" ");
4061 curpos = j;
4062 c = skip_till_newline(c);
4063 break;
4064 }
4065 case REQ_tm: // groff(7) "TerMinal" ### TODO: what are useful uses for it
4066 {
4067 c += j;
4068 getArguments(c, args);
4069 if ( args.count() )
4070 qCDebug(KIO_MAN_LOG) << ".tm " << args[0];
4071 break;
4072 }
4073 case REQ_B: // man(7) "Bold"
4074 mode = true;
4075 Q_FALLTHROUGH();
4076 case REQ_I: // man(7) "Italic"
4077 {
4078 /* parse one line in a certain font */
4079 c += j;
4080 getArguments(c, args);
4081
4082 out_html(set_font(mode ? "B" : "I"));
4083
4084 for (int i = 0; i < args.count(); i++)
4085 {
4086 scan_troff(args[i].data(), 1, nullptr);
4087 out_html(" ");
4088 }
4089
4090 out_html(set_font("R"));
4091
4092 if (fillout)
4093 curpos++;
4094 else
4095 {
4096 out_html(NEWLINE);
4097 curpos = 0;
4098 }
4099 break;
4100 }
4101 case REQ_Fd: // mdoc(7) "Function Definition"
4102 {
4103 // Normal text must be printed in bold, punctuation in regular font
4104 c += j;
4105 if (*c == '\n') c++;
4106 getArguments(c, args);
4107
4108 for (i = 0; i < args.count(); i++)
4109 {
4110 // ### FIXME In theory, only a single punctuation character is recognized as punctuation
4111 if ( is_mdoc_punctuation(args[i][0]) )
4112 out_html(set_font("R"));
4113 else
4114 out_html(set_font("B"));
4115 scan_troff(args[i].data(), 1, nullptr);
4116 out_html(" ");
4117 }
4118 // In the mdoc synopsis, there are automatical line breaks (### TODO: before or after?)
4119 if (mandoc_synopsis)
4120 out_html("<br>");
4121
4122 out_html(set_font("R"));
4123 out_html(NEWLINE);
4124 if (!fillout)
4125 curpos = 0;
4126 else
4127 curpos++;
4128 break;
4129 }
4130 case REQ_Fn: // mdoc(7) for "Function calls"
4131 {
4132 // brackets and commas have to be inserted automatically
4133 c += j;
4134 if (*c == '\n') c++;
4135 getArguments(c, args);
4136 if ( args.count() )
4137 {
4138 for (i = 0; i < args.count(); i++)
4139 {
4140 if (i)
4141 out_html(set_font("I"));
4142 else
4143 out_html(set_font("B"));
4144 scan_troff(args[i].data(), 1, nullptr);
4145 out_html(set_font("R"));
4146 if (i == 0)
4147 {
4148 out_html(" (");
4149 }
4150 else if (i < args.count() - 1)
4151 out_html(", ");
4152 }
4153 out_html(")");
4154 }
4155 out_html(set_font("R"));
4156 if (mandoc_synopsis)
4157 out_html("<br>");
4158 out_html(NEWLINE);
4159 if (!fillout)
4160 curpos = 0;
4161 else
4162 curpos++;
4163 break;
4164 }
4165 case REQ_Fo: // mdoc(7) "Function definition Opening"
4166 {
4167 char* font[2] = {(char*)"B", (char*)"R" };
4168 c += j;
4169 if (*c == '\n') c++;
4170 char *eol = strchr(c, '\n');
4171 char *semicolon = strchr(c, ';');
4172 if ((semicolon != nullptr) && (semicolon < eol)) *semicolon = ' ';
4173
4174 getArguments(c, args);
4175 // Normally a .Fo has only one parameter
4176 for (i = 0; i < args.count(); i++)
4177 {
4178 out_html(set_font(font[i&1]));
4179 scan_troff(args[i].data(), 1, nullptr);
4180 if (i == 0)
4181 {
4182 out_html(" (");
4183 }
4184 // ### TODO What should happen if there is more than one argument
4185 // else if (i<args.count()-1) out_html(", ");
4186 }
4187 function_argument = 1; // Must be > 0
4188 out_html(set_font("R"));
4189 out_html(NEWLINE);
4190 if (!fillout)
4191 curpos = 0;
4192 else
4193 curpos++;
4194 break;
4195 }
4196 case REQ_Fc:// mdoc(7) "Function definition Close"
4197 {
4198 // .Fc has no parameter
4199 c += j;
4200 c = skip_till_newline(c);
4201 char* font[2] = {(char*)"B", (char*)"R" };
4202 out_html(set_font(font[i&1]));
4203 out_html(")");
4204 out_html(set_font("R"));
4205 if (mandoc_synopsis)
4206 out_html("<br>");
4207 out_html(NEWLINE);
4208 if (!fillout)
4209 curpos = 0;
4210 else
4211 curpos++;
4212 function_argument = 0; // Reset the count variable
4213 break;
4214 }
4215 case REQ_Fa: // mdoc(7) "Function definition argument"
4216 {
4217 char* font[2] = {(char*)"B", (char*)"R" };
4218 c += j;
4219 if (*c == '\n') c++;
4220 getArguments(c, args);
4221 out_html(set_font(font[i&1]));
4222 // function_argument==0 means that we had no .Fo before, e.g. in mdoc.samples(7)
4223 if (function_argument > 1)
4224 {
4225 out_html(", ");
4226 curpos += 2;
4227 function_argument++;
4228 }
4229 else if (function_argument == 1)
4230 {
4231 // We are only at the first parameter
4232 function_argument++;
4233 }
4234 for (i = 0; i < args.count(); i++)
4235 scan_troff(args[i].data(), 1, nullptr);
4236
4237 out_html(set_font("R"));
4238 if (!fillout)
4239 curpos = 0;
4240 else
4241 curpos++;
4242 break;
4243 }
4244
4245 case REQ_OP: /* groff manpages use this construction */
4246 {
4247 /* .OP a b : [ <B>a</B> <I>b</I> ] */
4248 out_html(set_font("R"));
4249 out_html("[");
4250 curpos++;
4251 request_mixed_fonts(c, j, "B", "I", true, false);
4252 break;
4253 }
4254 case REQ_Ft: //perhaps "Function return type"
4255 {
4256 request_mixed_fonts(c, j, "B", "I", false, true);
4257 break;
4258 }
4259 case REQ_BR:
4260 {
4261 request_mixed_fonts(c, j, "B", "R", false, false);
4262 break;
4263 }
4264 case REQ_BI:
4265 {
4266 request_mixed_fonts(c, j, "B", "I", false, false);
4267 break;
4268 }
4269 case REQ_IB:
4270 {
4271 request_mixed_fonts(c, j, "I", "B", false, false);
4272 break;
4273 }
4274 case REQ_IR:
4275 {
4276 request_mixed_fonts(c, j, "I", "R", false, false);
4277 break;
4278 }
4279 case REQ_RB:
4280 {
4281 request_mixed_fonts(c, j, "R", "B", false, false);
4282 break;
4283 }
4284 case REQ_RI:
4285 {
4286 request_mixed_fonts(c, j, "R", "I", false, false);
4287 break;
4288 }
4289 case REQ_DT: // man(7) "Default Tabulators"
4290 {
4291 for (j = 0; j < 20; j++) tabstops[j] = (j + 1) * 8;
4292 maxtstop = 20;
4293 c = skip_till_newline(c);
4294 break;
4295 }
4296 case REQ_IP: // man(7) "Ident Paragraph"
4297 {
4298 c += j;
4299 getArguments(c, args);
4300
4301 if (!dl_set[itemdepth])
4302 {
4303 out_html("<DL>\n");
4304 dl_set[itemdepth] = 1;
4305 }
4306 out_html("<DT>");
4307
4308 if ( args.count() )
4309 scan_troff(args[0].data(), 1, nullptr);
4310
4311 out_html("</DT>\n<DD>");
4312 listItemStack.push("DD");
4313 curpos = 0;
4314 break;
4315 }
4316 case REQ_TP: // man(7) "hanging Tag Paragraph"
4317 {
4318 if (!dl_set[itemdepth])
4319 {
4320 out_html("<DL>\n");
4321 dl_set[itemdepth] = 1;
4322 }
4323 out_html(set_font("R"));
4324 out_html("<DT>");
4325 c = skip_till_newline(c);
4326 /* somewhere a definition ends with '.TP' */
4327 if (!*c)
4328 still_dd = true;
4329 else
4330 {
4331 // HACK for proc(5)
4332 while (c[0] == '.' && c[1] == '\\' && c[2] == '\"')
4333 {
4334 // We have a comment, so skip the line
4335 c = skip_till_newline(c);
4336 }
4337 c = scan_troff(c, 1, nullptr);
4338 out_html("<DD>");
4339 listItemStack.push("DD");
4340 }
4341 curpos = 0;
4342 break;
4343 }
4344 case REQ_IX: // Indexing term (printed on standard error)
4345 {
4346 c = skip_till_newline(c); // ignore
4347 break;
4348 }
4349 case REQ_P: // man(7) "Paragraph"
4350 case REQ_LP:// man(7) "Paragraph"
4351 case REQ_PP:// man(7) "Paragraph; reset Prevailing indent"
4352 {
4353 if (dl_set[itemdepth])
4354 {
4355 out_html("</DL>\n");
4356 dl_set[itemdepth] = 0;
4357 }
4358 else if (fillout) out_html("<br>");
4359
4360 if (fillout)
4361 out_html("<br>\n");
4362 else
4363 out_html(NEWLINE);
4364
4365 curpos = 0;
4366 c = skip_till_newline(c);
4367 break;
4368 }
4369 case REQ_HP: // man(7) "Hanging indent Paragraph"
4370 {
4371 if (!dl_set[itemdepth])
4372 {
4373 out_html("<DL>");
4374 dl_set[itemdepth] = 1;
4375 }
4376 out_html("<DT>\n");
4377 still_dd = true;
4378 c = skip_till_newline(c);
4379 curpos = 0;
4380 break;
4381 }
4382 case REQ_PD: // man(7) "Paragraph Distance"
4383 {
4384 c = skip_till_newline(c);
4385 break;
4386 }
4387 case REQ_Rs: // mdoc(7) "Relative margin Start"
4388 case REQ_RS: // man(7) "Relative margin Start"
4389 {
4390 c += j;
4391 getArguments(c, args);
4392 j = 1;
4393 if (args.count() > 0) scan_expression(args[0].data(), &j);
4394 if (j >= 0)
4395 {
4396 itemdepth++;
4397 dl_set[itemdepth] = 0;
4398 out_html("<DL><DT></DT><DD>");
4399 listItemStack.push("DD");
4400 curpos = 0;
4401 }
4402 break;
4403 }
4404 case REQ_Re: // mdoc(7) "Relative margin End"
4405 case REQ_RE: // man(7) "Relative margin End"
4406 {
4407 if (itemdepth > 0)
4408 {
4409 if (dl_set[itemdepth]) out_html("</DL>");
4410 out_html("</DL>\n");
4411 itemdepth--;
4412 }
4413 c = skip_till_newline(c);
4414 curpos = 0;
4415 break;
4416 }
4417 case REQ_SB: // man(7) "Small; Bold"
4418 {
4419 out_html(set_font("B"));
4420 out_html("<small>");
4421 c = scan_troff(c + j, 1, nullptr);
4422 out_html("</small>");
4423 out_html(set_font("R"));
4424 break;
4425 }
4426 case REQ_SM: // man(7) "SMall"
4427 {
4428 c = c + j;
4429 if (*c == '\n') c++;
4430 out_html("<small>");
4431 c = scan_troff(c, 1, nullptr);
4432 out_html("</small>");
4433 break;
4434 }
4435 case REQ_Ss: // mdoc(7) "Sub Section"
4436 mandoc_command = 1;
4437 Q_FALLTHROUGH();
4438 case REQ_SS: // mdoc(7) "Sub Section"
4439 mode = true;
4440 Q_FALLTHROUGH();
4441 case REQ_Sh: // mdoc(7) "Sub Header"
4442 /* hack for fallthru from above */
4443 mandoc_command = !mode || mandoc_command;
4444 Q_FALLTHROUGH();
4445 case REQ_SH: // man(7) "Sub Header"
4446 {
4447 c = c + j;
4448 if (*c == '\n') c++;
4449 while (itemdepth || dl_set[itemdepth])
4450 {
4451 out_html("</DL>\n");
4452 if (dl_set[itemdepth])
4453 dl_set[itemdepth] = 0;
4454 else if (itemdepth > 0)
4455 itemdepth--;
4456 }
4457 out_html(set_font("R"));
4458 out_html(change_to_size(0));
4459 if (!fillout)
4460 {
4461 fillout = 1;
4462 out_html("</PRE>");
4463 }
4464 trans_char(c, '"', '\a');
4465 if (in_div)
4466 {
4467 out_html("</div>\n");
4468 in_div = 0;
4469 }
4470 if (mode)
4471 out_html("\n<H3>");
4472 else
4473 out_html("\n<H2>");
4474 mandoc_synopsis = qstrncmp(c, "SYNOPSIS", 8) == 0;
4475 c = mandoc_command ? scan_troff_mandoc(c, 1, nullptr) : scan_troff(c, 1, nullptr);
4476 if (mode)
4477 out_html("</H3>\n");
4478 else
4479 out_html("</H2>\n");
4480
4481 out_html("<div>\n");
4482 in_div = 1;
4483 curpos = 0;
4484 break;
4485 }
4486 case REQ_Sx: // mdoc(7)
4487 {
4488 // reference to a section header
4489 out_html(set_font("B"));
4490 trans_char(c, '"', '\a');
4491 c = c + j;
4492 if (*c == '\n') c++;
4493 c = scan_troff(c, 1, nullptr);
4494 out_html(set_font("R"));
4495 out_html(NEWLINE);
4496 if (fillout)
4497 curpos++;
4498 else
4499 curpos = 0;
4500 break;
4501 }
4502 case REQ_St: // groff_mdoc
4503 {
4504 c += j;
4505 getArguments(c, args);
4506 if ( args.count() )
4507 {
4508 bool found = false;
4509 for (const StandardName &standardName : STANDARD_NAMES)
4510 {
4511 if ( args[0] == standardName.abbrev )
4512 {
4513 found = true;
4514 out_html(standardName.formalName);
4515 break;
4516 }
4517 }
4518 if ( !found ) // an unknown standard - print the abbreviation
4519 out_html(args[0]);
4520 }
4521 break;
4522 }
4523 case REQ_TS: // Table Start tbl(1)
4524 {
4525 c = scan_table(c);
4526 break;
4527 }
4528 case REQ_Dt: /* mdoc(7) */
4529 mandoc_command = true;
4530 Q_FALLTHROUGH();
4531 case REQ_TH: // man(7) "Title Header"
4532 {
4533 if (!output_possible)
4534 {
4535 c += j;
4536 getArguments(c, args);
4537 output_possible = true;
4538 out_html(DOCTYPE"<HTML>\n<HEAD>\n");
4539 out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n");
4540 out_html("<TITLE>");
4541 if ( args.count() )
4542 {
4543 // work around the problem that in a title no HTML tags are allowed
4544 // but args[0] can have formatting escapes, e.g. to switch a font
4545 // which results in a HTML tag added to the output
4546 char *result = nullptr;
4547 scan_troff(args[0].data(), 0, &result);
4548 char *p = result;
4549 QByteArray title;
4550 while ( *p )
4551 {
4552 if ( *p == '<' ) // tag begin -> skip whole tag
4553 {
4554 for (p++; *p && (*p != '>'); p++) ;
4555 if ( *p ) p++;
4556 }
4557 if ( *p )
4558 title += *p++;
4559 }
4560 ignore_links = true;
4561 title += '\n'; // needed so that out_html flushes buffer and ignore_links works
4562 out_html(title);
4563 ignore_links = false;
4564 delete [] result;
4565 }
4566 out_html(" Manpage</TITLE>\n");
4567
4568 // KDE defaults.
4569 out_html("<link rel=\"stylesheet\" href=\"help:/kdoctools5-common/kde-default.css\"");
4570 out_html(" type=\"text/css\">\n");
4571
4572 // Output our custom stylesheet.
4573 out_html("<link rel=\"stylesheet\" href=\"");
4574 out_html(cssFile);
4575 out_html("\" type=\"text/css\">\n");
4576
4577 // Some elements need background images, but this
4578 // could not be included in the stylesheet,
4579 // include it now.
4580 out_html("<style type=\"text/css\">\n#header_top { "
4581 "background-image: url(\"help:/kdoctools5-common/top.jpg\"); }\n\n"
4582 "#header_top div { "
4583 "background-image: url(\"help:/kdoctools5-common/top-left.jpg\"); }\n\n"
4584 "#header_top div div { "
4585 "background-image: url(\"help:/kdoctools5-common/top-right.jpg\"); }\n\n"
4586 "</style>\n\n"
4587 );
4588
4589 out_html("<meta name=\"ROFF_Type\" content=\"");
4590 if (mandoc_command)
4591 out_html("mdoc");
4592 else
4593 out_html("man");
4594 out_html("\">\n");
4595
4596 out_html("</HEAD>\n\n");
4597 out_html("<BODY>\n\n");
4598
4599 out_html("<div id=\"header\"><div id=\"header_top\">\n");
4600 out_html("<div><div>\n");
4601 out_html("<img src=\"help:/kdoctools5-common/top-kde.jpg\" alt=\"top-kde\"> ");
4602 if ( args.count() )
4603 scan_troff(args[0].data(), 0, nullptr);
4604 out_html(" Manual Page");
4605 out_html("</div></div></div></div>\n");
4606
4607 out_html("<div style=\"margin-left: 5em; margin-right: 5em;\">\n");
4608 out_html("<h1>");
4609 if ( args.count() )
4610 scan_troff(args[0].data(), 0, nullptr);
4611 out_html("</h1>\n");
4612 if (args.count() > 1)
4613 {
4614 out_html("Section: ");
4615 if ( !mandoc_command && (args.count() > 4) )
4616 scan_troff(args[4].data(), 0, nullptr);
4617 else
4618 out_html(section_name(args[1].data()));
4619 out_html(" (");
4620 scan_troff(args[1].data(), 0, nullptr);
4621 out_html(")\n");
4622 }
4623 else
4624 {
4625 out_html("Section not specified");
4626 }
4627 }
4628 else
4629 {
4630 qCWarning(KIO_MAN_LOG) << ".TH found but output not possible" ;
4631 c = skip_till_newline(c);
4632 }
4633 curpos = 0;
4634 break;
4635 }
4636 case REQ_TX: // mdoc(7)
4637 {
4638 c += j;
4639 getArguments(c, args);
4640 out_html(set_font("I"));
4641 const char *c2 = lookup_abbrev(args[0]);
4642 curpos += qstrlen(c2);
4643 out_html(c2);
4644 out_html(set_font("R"));
4645 if (args.count() > 1)
4646 out_html(args[1]);
4647 break;
4648 }
4649 case REQ_rm: // groff(7) "ReMove"
4650 /* .rm xx : Remove request, macro or string */
4651 mode = true;
4652 Q_FALLTHROUGH();
4653 case REQ_rn: // groff(7) "ReName"
4654 /* .rn xx yy : Rename request, macro or string xx to yy */
4655 {
4656 qCDebug(KIO_MAN_LOG) << "start .rm/.rn";
4657 c += j;
4658 const QByteArray name(scan_identifier(c));
4659 if (name.isEmpty())
4660 {
4661 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename";
4662 break;
4663 }
4664 QByteArray name2;
4665 if (!mode)
4666 {
4667 while (*c && isspace(*c) && *c != '\n') ++c;
4668 name2 = scan_identifier(c);
4669 if (name2.isEmpty())
4670 {
4671 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to rename";
4672 break;
4673 }
4674 }
4675 c = skip_till_newline(c);
4676 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
4677 if (it == s_stringDefinitionMap.end())
4678 {
4679 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to rename or remove: " << BYTEARRAY(name);
4680 }
4681 else
4682 {
4683 if (mode)
4684 {
4685 // .rm ReMove
4686 s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4687 }
4688 else
4689 {
4690 // .rn ReName
4691 StringDefinition def = (*it);
4692 s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4693 s_stringDefinitionMap.insert(name2, def);
4694 }
4695 }
4696 qCDebug(KIO_MAN_LOG) << "end .rm/.rn";
4697 break;
4698 }
4699 case REQ_nx:
4700 case REQ_in: // groff(7) "INdent"
4701 {
4702 /* .in +-N : Indent */
4703 c = skip_till_newline(c);
4704 break;
4705 }
4706 case REQ_nr: // groff(7) "Number Register"
4707 {
4708 qCDebug(KIO_MAN_LOG) << "start .nr";
4709 c += j;
4710 const QByteArray name(scan_identifier(c));
4711 if (name.isEmpty())
4712 {
4713 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty name for register variable";
4714 break;
4715 }
4716 while (*c && (*c == ' ' || *c == '\t')) c++;
4717 int sign = 0;
4718 if (*c && (*c == '+' || *c == '-'))
4719 {
4720 if (*c == '+')
4721 sign = 1;
4722 else if (*c == '-')
4723 sign = -1;
4724 }
4725 int value = 0;
4726 int increment = 0;
4727 c = scan_expression(c, &value);
4728 if (*c && *c != '\n')
4729 {
4730 while (*c && (*c == ' ' || *c == '\t')) c++;
4731 c = scan_expression(c, &increment);
4732 }
4733 c = skip_till_newline(c);
4734 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
4735 if (it == s_numberDefinitionMap.end())
4736 {
4737 if (sign < 1)
4738 value = -value;
4739 NumberDefinition def(value, increment);
4740 s_numberDefinitionMap.insert(name, def);
4741 }
4742 else
4743 {
4744 if (sign > 0)
4745 (*it).m_value += value;
4746 else if (sign < 0)
4747 (*it).m_value += - value;
4748 else
4749 (*it).m_value = value;
4750 (*it).m_increment = increment;
4751 }
4752 qCDebug(KIO_MAN_LOG) << "end .nr";
4753 break;
4754 }
4755 case REQ_am: // groff(7) "Append Macro"
4756 /* .am xx yy : append to a macro. */
4757 /* define or handle as .ig yy */
4758 mode = true;
4759 Q_FALLTHROUGH();
4760 case REQ_de: // groff(7) "DEfine macro"
4761 case REQ_de1: // groff(7) "DEfine macro"
4762 {
4763 /* .de xx yy : define or redefine macro xx; end at .yy (..) */
4764 /* define or handle as .ig yy */
4765 qCDebug(KIO_MAN_LOG) << "Start .am/.de";
4766 c += j;
4767 getArguments(c, args);
4768 if ( args.count() == 0 )
4769 break;
4770
4771 const QByteArray name(args[0]);
4772
4773 QByteArray endmacro;
4774 if (args.count() == 1)
4775 endmacro = "..";
4776 else
4777 endmacro = "." + args[1]; // krazy:exclude=doublequote_chars
4778
4779 sl = c;
4780 while (*c && qstrncmp(c, endmacro, endmacro.length()))
4781 c = skip_till_newline(c);
4782
4783 QByteArray macro;
4784 while (sl != c)
4785 {
4786 if (sl[0] == '\\' && sl[1] == '\\')
4787 {
4788 macro += '\\';
4789 sl++;
4790 }
4791 else
4792 macro += *sl;
4793 sl++;
4794 }
4795
4796 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name);
4797 if (it == s_stringDefinitionMap.end())
4798 {
4799 StringDefinition def;
4800 def.m_length = 0;
4801 def.m_output = macro;
4802 s_stringDefinitionMap.insert(name, def);
4803 }
4804 else if (mode)
4805 {
4806 // .am Append Macro
4807 (*it).m_length = 0; // It could be formerly a string
4808 if (!(*it).m_output.endsWith('\n'))
4809 (*it).m_output += '\n';
4810 (*it).m_output += macro;
4811 }
4812 else
4813 {
4814 // .de DEfine macro
4815 (*it).m_length = 0; // It could be formerly a string
4816 (*it).m_output = macro;
4817 }
4818 c = skip_till_newline(c);
4819 qCDebug(KIO_MAN_LOG) << "End .am/.de";
4820 break;
4821 }
4822 case REQ_Bl: // mdoc(7) "Begin List"
4823 {
4824 QByteArray list_options;
4825 char *nl = strchr(c, '\n');
4826 c = c + j;
4827 if (dl_set[itemdepth])
4828 {
4829 /* These things can nest. */
4830 itemdepth++;
4831 }
4832 if (nl)
4833 {
4834 /* Parse list options */
4835 list_options = QByteArray(c, nl - c);
4836 }
4837 if ( list_options.contains("-bullet") )
4838 {
4839 /* HTML Unnumbered List */
4840 dl_set[itemdepth] = BL_BULLET_LIST;
4841 out_html("<UL>\n");
4842 }
4843 else if ( list_options.contains("-enum") )
4844 {
4845 /* HTML Ordered List */
4846 dl_set[itemdepth] = BL_ENUM_LIST;
4847 out_html("<OL>\n");
4848 }
4849 else
4850 {
4851 /* HTML Descriptive List */
4852 dl_set[itemdepth] = BL_DESC_LIST;
4853 out_html("<DL>\n");
4854 }
4855 curpos = 0;
4856 c = skip_till_newline(c);
4857 break;
4858 }
4859 case REQ_El: // mdoc(7) "End List"
4860 {
4861 checkListStack();
4862 c = c + j;
4863 if (dl_set[itemdepth] & BL_DESC_LIST)
4864 out_html("</DL>\n");
4865 else if (dl_set[itemdepth] & BL_BULLET_LIST)
4866 out_html("</UL>\n");
4867 else if (dl_set[itemdepth] & BL_ENUM_LIST)
4868 out_html("</OL>\n");
4869 dl_set[itemdepth] = 0;
4870 if (itemdepth > 0) itemdepth--;
4871 if ( !fillout )
4872 out_html(NEWLINE);
4873
4874 curpos = 0;
4875 c = skip_till_newline(c);
4876 break;
4877 }
4878 case REQ_It: // mdoc(7) "list ITem"
4879 {
4880 checkListStack();
4881 c = c + j;
4882 //if (qstrncmp(c, "Xo", 2) == 0 && isspace(*(c + 2)))
4883 //c = skip_till_newline(c);
4884 if (dl_set[itemdepth] & BL_DESC_LIST)
4885 {
4886 out_html("<DT>");
4887 out_html(set_font("B"));
4888 if (*c == '\n')
4889 {
4890 /* Don't allow embedded comms after a newline */
4891 c++;
4892 c = scan_troff(c, 1, nullptr);
4893 }
4894 else
4895 {
4896 /* Do allow embedded comms on the same line. */
4897 c = scan_troff_mandoc(c, 1, nullptr);
4898 }
4899 out_html(set_font("R"));
4900 out_html("</DT>");
4901 out_html(NEWLINE);
4902 out_html("<DD>");
4903 listItemStack.push("DD");
4904 }
4905 else if (dl_set[itemdepth] & (BL_BULLET_LIST | BL_ENUM_LIST))
4906 {
4907 out_html("<LI>");
4908 listItemStack.push("LI");
4909 c = scan_troff_mandoc(c, 1, nullptr);
4910 out_html(NEWLINE);
4911 }
4912 if (fillout)
4913 curpos++;
4914 else
4915 curpos = 0;
4916 break;
4917 }
4918 case REQ_Bk: /* mdoc(7) */
4919 case REQ_Ek: /* mdoc(7) */
4920 case REQ_Dd: /* mdoc(7) */
4921 case REQ_Os: // mdoc(7) "Operating System"
4922 case REQ_Sm: // mdoc(7) space mode
4923 c = skip_till_newline(c); // TODO
4924 break;
4925 case REQ_Bt: // mdoc(7) "Beta Test"
4926 {
4927 //trans_char(c, '"', '\a');
4928 //c = c + j;
4929 out_html(" is currently in beta test.");
4930 if (fillout)
4931 curpos++;
4932 else
4933 curpos = 0;
4934 break;
4935 }
4936 case REQ_At: /* mdoc(7) */
4937 case REQ_Fx: /* mdoc(7) */
4938 case REQ_Nx: /* mdoc(7) */
4939 case REQ_Ox: /* mdoc(7) */
4940 case REQ_Bx: /* mdoc(7) */
4941 case REQ_Ux: /* mdoc(7) */
4942 case REQ_Dx: /* mdoc(7) */
4943 {
4944 bool parsable = true;
4945 trans_char(c, '"', '\a');
4946 c = c + j;
4947 if (*c == '\n') c++;
4948 if (request == REQ_At)
4949 {
4950 out_html("AT&T UNIX ");
4951 parsable = false;
4952 }
4953 else if (request == REQ_Fx)
4954 {
4955 out_html("FreeBSD ");
4956 parsable = false;
4957 }
4958 else if (request == REQ_Nx)
4959 out_html("NetBSD ");
4960 else if (request == REQ_Ox)
4961 out_html("OpenBSD ");
4962 else if (request == REQ_Bx)
4963 out_html("BSD ");
4964 else if (request == REQ_Ux)
4965 out_html("UNIX ");
4966 else if (request == REQ_Dx)
4967 out_html("DragonFly ");
4968 if (parsable)
4969 c = scan_troff_mandoc(c, 1, nullptr);
4970 else
4971 c = scan_troff(c, 1, nullptr);
4972 if (fillout)
4973 curpos++;
4974 else
4975 curpos = 0;
4976 break;
4977 }
4978 case REQ_Dl: /* mdoc(7) */
4979 {
4980 c = c + j;
4981 out_html(NEWLINE);
4982 out_html("<BLOCKQUOTE>");
4983 if (*c == '\n') c++;
4984 c = scan_troff_mandoc(c, 1, nullptr);
4985 out_html("</BLOCKQUOTE>");
4986 if (fillout)
4987 curpos++;
4988 else
4989 curpos = 0;
4990 break;
4991 }
4992 case REQ_Bd: /* mdoc(7) */
4993 { /* Seems like a kind of example/literal mode */
4994 QByteArray bd_options;
4995 char *nl = strchr(c, '\n');
4996 c = c + j;
4997 if (nl)
4998 bd_options = QByteArray(c, nl - c);
4999 out_html(NEWLINE);
5000 mandoc_bd_options = 0; /* Remember options for terminating Bl */
5001 if ( bd_options.contains("-offset indent") )
5002 {
5003 mandoc_bd_options |= BD_INDENT;
5004 out_html("<BLOCKQUOTE>\n");
5005 }
5006 if ( bd_options.contains("-literal") || bd_options.contains("-unfilled") )
5007 {
5008 if (fillout)
5009 {
5010 mandoc_bd_options |= BD_LITERAL;
5011 out_html(set_font("R"));
5012 out_html(change_to_size('0'));
5013 out_html("<PRE>\n");
5014 }
5015 curpos = 0;
5016 fillout = 0;
5017 }
5018 c = skip_till_newline(c);
5019 break;
5020 }
5021 case REQ_Ed: /* mdoc(7) */
5022 {
5023 if (mandoc_bd_options & BD_LITERAL)
5024 {
5025 if (!fillout)
5026 {
5027 out_html(set_font("R"));
5028 out_html(change_to_size('0'));
5029 out_html("</PRE>\n");
5030 }
5031 }
5032 if (mandoc_bd_options & BD_INDENT)
5033 out_html("</BLOCKQUOTE>\n");
5034 curpos = 0;
5035 fillout = 1;
5036 c = skip_till_newline(c);
5037 break;
5038 }
5039 case REQ_Be: /* mdoc(7) */
5040 {
5041 c = c + j;
5042 if (fillout)
5043 out_html("<br><br>");
5044 else
5045 {
5046 out_html(NEWLINE);
5047 }
5048 curpos = 0;
5049 c = skip_till_newline(c);
5050 break;
5051 }
5052 case REQ_Xr: /* mdoc(7) */ // ### FIXME: it should issue a <a href="man:somewhere(x)"> directly
5053 {
5054 /* Translate xyz 1 to xyz(1)
5055 * Allow for multiple spaces. Allow the section to be missing.
5056 */
5057 char buff[NULL_TERMINATED(MED_STR_MAX)];
5058 char *bufptr;
5059 trans_char(c, '"', '\a');
5060 bufptr = buff;
5061 c = c + j;
5062 if (*c == '\n') c++; /* Skip spaces */
5063 while (isspace(*c) && *c != '\n') c++;
5064 while (isalnum(*c) || *c == '.' || *c == ':' || *c == '_' || *c == '-')
5065 {
5066 /* Copy the xyz part */
5067 *bufptr = *c;
5068 bufptr++;
5069 if (bufptr >= buff + MED_STR_MAX) break;
5070 c++;
5071 }
5072 while (isspace(*c) && *c != '\n') c++; /* Skip spaces */
5073 if (isdigit(*c))
5074 {
5075 /* Convert the number if there is one */
5076 *bufptr = '(';
5077 bufptr++;
5078 if (bufptr < buff + MED_STR_MAX)
5079 {
5080 while (isalnum(*c))
5081 {
5082 *bufptr = *c;
5083 bufptr++;
5084 if (bufptr >= buff + MED_STR_MAX) break;
5085 c++;
5086 }
5087 if (bufptr < buff + MED_STR_MAX)
5088 {
5089 *bufptr = ')';
5090 bufptr++;
5091 }
5092 }
5093 }
5094 while (*c != '\n')
5095 {
5096 /* Copy the remainder */
5097 if (!isspace(*c))
5098 {
5099 *bufptr = *c;
5100 bufptr++;
5101 if (bufptr >= buff + MED_STR_MAX) break;
5102 }
5103 c++;
5104 }
5105 *bufptr = '\n';
5106 bufptr[1] = 0;
5107 scan_troff_mandoc(buff, 1, nullptr);
5108 out_html(NEWLINE);
5109 if (fillout)
5110 curpos++;
5111 else
5112 curpos = 0;
5113 break;
5114 }
5115 case REQ_Fl: // mdoc(7) "FLags"
5116 {
5117 //trans_char(c, '"', '\a');
5118 c += j;
5119 QList<char*> argPointers;
5120 getArguments(c, args, &argPointers);
5121 out_html(set_font("B"));
5122 out_html("-");
5123 if ( args.count() == 0 )
5124 {
5125 /*out_html("-");*/ // stdin or stdout
5126 }
5127 else
5128 {
5129 if ( argPointers.count() )
5130 scan_troff_mandoc(argPointers[0], 1, nullptr);
5131 /*
5132 for (i = 0; i < args.count(); ++i)
5133 {
5134 if (ispunct(args[i][0]) && args[i][0] != '-')
5135 {
5136 scan_troff_mandoc(argPointers[i], 1, NULL);
5137 }
5138 else
5139 {
5140 if (i > 0)
5141 out_html(" "); // Put a space between flags
5142 out_html("-");
5143 scan_troff_mandoc(argPointers[i], 1, NULL);
5144 }
5145 }
5146 */
5147 }
5148 out_html(set_font("R"));
5149 out_html(NEWLINE);
5150 if (fillout)
5151 curpos++;
5152 else
5153 curpos = 0;
5154 break;
5155 }
5156 case REQ_Pa: /* mdoc(7) */
5157 case REQ_Pf: /* mdoc(7) */
5158 {
5159 trans_char(c, '"', '\a');
5160 c = c + j;
5161 if (*c == '\n') c++;
5162 c = scan_troff_mandoc(c, 1, nullptr);
5163 out_html(NEWLINE);
5164 if (fillout)
5165 curpos++;
5166 else
5167 curpos = 0;
5168 break;
5169 }
5170 case REQ_Pp: /* mdoc(7) */
5171 {
5172 if (fillout)
5173 out_html("<br><br>\n");
5174 else
5175 {
5176 out_html(NEWLINE);
5177 }
5178 curpos = 0;
5179 c = skip_till_newline(c);
5180 break;
5181 }
5182 case REQ_Aq: // mdoc(7) "Angle bracket Quote"
5183 c = process_quote(c, j, "<", ">");
5184 break;
5185 case REQ_Bq: // mdoc(7) "Bracket Quote"
5186 c = process_quote(c, j, "[", "]");
5187 break;
5188 case REQ_Dq: // mdoc(7) "Double Quote"
5189 c = process_quote(c, j, "“", "”");
5190 break;
5191 case REQ_Pq: // mdoc(7) "Parenthese Quote"
5192 c = process_quote(c, j, "(", ")");
5193 break;
5194 case REQ_Qq: // mdoc(7) "straight double Quote"
5195 c = process_quote(c, j, """, """);
5196 break;
5197 case REQ_Sq: // mdoc(7) "Single Quote"
5198 c = process_quote(c, j, "‘", "’");
5199 break;
5200 case REQ_Op: /* mdoc(7) */
5201 {
5202 trans_char(c, '"', '\a');
5203 c += j;
5204 if (*c == '\n') c++;
5205 out_html(set_font("R"));
5206 out_html("[");
5207 c = scan_troff_mandoc(c, 1, nullptr);
5208 out_html(set_font("R"));
5209 out_html("]");
5210 out_html(NEWLINE);
5211 if (fillout)
5212 curpos++;
5213 else
5214 curpos = 0;
5215 break;
5216 }
5217 case REQ_Oo: /* mdoc(7) */
5218 {
5219 trans_char(c, '"', '\a');
5220 c += j;
5221 if (*c == '\n') c++;
5222 out_html(set_font("R"));
5223 out_html("[");
5224 c = scan_troff_mandoc(c, 1, nullptr);
5225 if (fillout)
5226 curpos++;
5227 else
5228 curpos = 0;
5229 break;
5230 }
5231 case REQ_Oc: /* mdoc(7) */
5232 {
5233 trans_char(c, '"', '\a');
5234 c += j;
5235 out_html(set_font("R"));
5236 out_html("]");
5237 c = scan_troff_mandoc(c, 1, nullptr);
5238 if (fillout)
5239 curpos++;
5240 else
5241 curpos = 0;
5242 break;
5243 }
5244 case REQ_Ql: /* mdoc(7) */
5245 {
5246 /* Single quote first word in the line */
5247 char *sp;
5248 trans_char(c, '"', '\a');
5249 c = c + j;
5250 if (*c == '\n') c++;
5251 sp = c;
5252 do
5253 {
5254 /* Find first whitespace after the
5255 * first word that isn't a mandoc macro
5256 */
5257 while (*sp && isspace(*sp)) sp++;
5258 while (*sp && !isspace(*sp)) sp++;
5259 }
5260 while (*sp && isupper(*(sp - 2)) && islower(*(sp - 1)));
5261
5262 /* Use a newline to mark the end of text to
5263 * be quoted
5264 */
5265 if (*sp) *sp = '\n';
5266 out_html("`"); /* Quote the text */
5267 c = scan_troff_mandoc(c, 1, nullptr);
5268 out_html("'");
5269 out_html(NEWLINE);
5270 if (fillout)
5271 curpos++;
5272 else
5273 curpos = 0;
5274 break;
5275 }
5276 case REQ_Ar: /* mdoc(7) */
5277 {
5278 /* parse one line in italics */
5279 out_html(set_font("I"));
5280 c += j;
5281 QList<char*> argPointers;
5282 getArguments(c, args, &argPointers);
5283 if ( args.count() == 0 )
5284 {
5285 // An empty Ar means "file ..."
5286 out_html("file ...");
5287 }
5288 else
5289 {
5290 if ( argPointers.count() )
5291 c = scan_troff_mandoc(argPointers[0], 1, nullptr);
5292 }
5293
5294 out_html(set_font("R"));
5295 out_html(NEWLINE);
5296 if (fillout)
5297 curpos++;
5298 else
5299 curpos = 0;
5300 break;
5301 }
5302 case REQ_Em: /* mdoc(7) */
5303 {
5304 out_html("<em>");
5305 trans_char(c, '"', '\a');
5306 c += j;
5307 if (*c == '\n') c++;
5308 c = scan_troff_mandoc(c, 1, nullptr);
5309 out_html("</em>");
5310 out_html(NEWLINE);
5311 if (fillout)
5312 curpos++;
5313 else
5314 curpos = 0;
5315 break;
5316 }
5317 case REQ_Ad: /* mdoc(7) */
5318 case REQ_Va: /* mdoc(7) */
5319 case REQ_Xo: /* mdoc(7) */
5320 case REQ_Xc: /* mdoc(7) */
5321 {
5322 /* parse one line in italics */
5323 out_html(set_font("I"));
5324 trans_char(c, '"', '\a');
5325 c = c + j;
5326 if (*c == '\n') c++;
5327 c = scan_troff_mandoc(c, 1, nullptr);
5328 out_html(set_font("R"));
5329 out_html(NEWLINE);
5330 if (fillout)
5331 curpos++;
5332 else
5333 curpos = 0;
5334 break;
5335 }
5336 case REQ_Nd: /* mdoc(7) */
5337 {
5338 trans_char(c, '"', '\a');
5339 c = c + j;
5340 if (*c == '\n') c++;
5341 out_html(" - ");
5342 c = scan_troff_mandoc(c, 1, nullptr);
5343 out_html(NEWLINE);
5344 if (fillout)
5345 curpos++;
5346 else
5347 curpos = 0;
5348 break;
5349 }
5350 case REQ_Nm: // mdoc(7) "Name Macro"
5351 {
5352 c += j;
5353 QList<char*> argPointers;
5354 getArguments(c, args, &argPointers);
5355
5356 if ( mandoc_name.isEmpty() && args.count() )
5357 mandoc_name = args[0];
5358
5359 if ( mandoc_synopsis )
5360 {
5361 /* Break lines only in the Synopsis.
5362 * The Synopsis section seems to be treated
5363 * as a special case - Bummer!
5364 * Do not insert a break before the very first Nm in this section
5365 */
5366
5367 if ( mandoc_name_count )
5368 out_html("<BR>");
5369
5370 mandoc_name_count++;
5371 }
5372
5373 out_html(set_font("B"));
5374
5375 // only show name if
5376 // .Nm (first not-null-length defined name)
5377 // .Nm name
5378 // do not show
5379 // .Nm ""
5380 if ( args.count() == 0 )
5381 scan_troff(mandoc_name.data(), 0, nullptr);
5382 else
5383 {
5384 if ( argPointers.count() )
5385 c = scan_troff_mandoc(argPointers[0], 1, nullptr);
5386 }
5387
5388 out_html(set_font("R"));
5389
5390 if (fillout)
5391 curpos++;
5392 else
5393 curpos = 0;
5394 break;
5395 }
5396 case REQ_Cd: /* mdoc(7) */
5397 case REQ_Cm: /* mdoc(7) */
5398 case REQ_Ic: /* mdoc(7) */
5399 case REQ_Ms: /* mdoc(7) */
5400 case REQ_Or: /* mdoc(7) */
5401 case REQ_Sy: /* mdoc(7) */
5402 {
5403 /* parse one line in bold */
5404 out_html(set_font("B"));
5405 trans_char(c, '"', '\a');
5406 c = c + j;
5407 if (*c == '\n') c++;
5408 c = scan_troff_mandoc(c, 1, nullptr);
5409 out_html(set_font("R"));
5410 out_html(NEWLINE);
5411 if (fillout)
5412 curpos++;
5413 else
5414 curpos = 0;
5415 break;
5416 }
5417 case REQ_Ta: /* mdoc(7) */
5418 {
5419 // ### FIXME: this is a simplification
5420 // for a list item element in a ".Bl -tag -width indent" type list
5421 // man:mdoc says: "indent == Six constant width spaces"
5422 out_html(" ");
5423 c = c + j;
5424 if (*c == '\n') c++;
5425 break;
5426 }
5427 // ### FIXME: punctuation is handled badly!
5428 case REQ_Dv: /* mdoc(7) */
5429 case REQ_Ev: /* mdoc(7) */
5430 case REQ_Fr: /* mdoc(7) */
5431 case REQ_Li: /* mdoc(7) */
5432 case REQ_nN: /* mdoc(7) */
5433 {
5434 trans_char(c, '"', '\a');
5435 c += j;
5436 if (*c == '\n') c++;
5437 out_html(set_font("B"));
5438 c = scan_troff_mandoc(c, 1, nullptr);
5439 out_html(set_font("R"));
5440 out_html(NEWLINE);
5441 if (fillout)
5442 curpos++;
5443 else
5444 curpos = 0;
5445 break;
5446 }
5447 case REQ_Tn: /* mdoc(7) Trade Names ... prints its arguments in a smaller font */
5448 {
5449 trans_char(c, '"', '\a');
5450 c += j;
5451 if (*c == '\n') c++;
5452 out_html("<small>");
5453 c = scan_troff_mandoc(c, 1, NULL);
5454 out_html("</small>");
5455 if (fillout)
5456 curpos++;
5457 else
5458 curpos = 0;
5459 break;
5460 }
5461 case REQ_Ns: /* mdoc(7) No-Space Macro */
5462 {
5463 c += j;
5464 while (*c && isspace(*c) && (*c != '\n')) c++;
5465 Q_FALLTHROUGH(); // (The '.Ns' macro always invokes the '.No' macro...)
5466 }
5467 case REQ_No: /* mdoc(7) Normal Text Macro */
5468 {
5469 if ( request == REQ_No ) // not fallen through from REQ_Ns
5470 {
5471 trans_char(c, '"', '\a');
5472 c += j;
5473 if (*c == '\n') c++;
5474 }
5475 out_html("<span style=\"font-style:normal\">");
5476 c = scan_troff_mandoc(c, 1, NULL);
5477 out_html("</span>");
5478 out_html(NEWLINE);
5479 if (fillout)
5480 curpos++;
5481 else
5482 curpos = 0;
5483 break;
5484 }
5485 case REQ_perc_A: /* mdoc(7) biblio stuff */
5486 case REQ_perc_D:
5487 case REQ_perc_N:
5488 case REQ_perc_O:
5489 case REQ_perc_P:
5490 case REQ_perc_Q:
5491 case REQ_perc_V:
5492 {
5493 c = c + j;
5494 if (*c == '\n') c++;
5495 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */
5496 if (fillout)
5497 curpos++;
5498 else
5499 curpos = 0;
5500 break;
5501 }
5502 case REQ_perc_B:
5503 case REQ_perc_J:
5504 case REQ_perc_R:
5505 case REQ_perc_T:
5506 {
5507 c = c + j;
5508 out_html(set_font("I"));
5509 if (*c == '\n') c++;
5510 c = scan_troff(c, 1, nullptr); /* Don't allow embedded mandoc coms */
5511 out_html(set_font("R"));
5512 if (fillout)
5513 curpos++;
5514 else
5515 curpos = 0;
5516 break;
5517 }
5518 case REQ_URL: // man(7) ".URL url link trailer"
5519 {
5520 c += j;
5521
5522 getArguments(c, args);
5523 ignore_links = true;
5524 out_html("<a href=\"");
5525
5526 if ( args.count() > 0 )
5527 scan_troff(args[0].data(), 0, nullptr);
5528
5529 out_html("\">");
5530 if ( args.count() > 1 )
5531 scan_troff(args[1].data(), 0, nullptr);
5532
5533 out_html("</a>\n"); // trailing newline important to make ignore_links work
5534 ignore_links = false;
5535
5536 if ( args.count() > 2 )
5537 scan_troff(args[2].data(), 1, nullptr);
5538
5539 break;
5540 }
5541 case REQ_tr: // translate TODO
5542 {
5543 c = skip_till_newline(c);
5544 break;
5545 }
5546 case REQ_nroff: // groff(7) "NROFF mode"
5547 mode = true;
5548 Q_FALLTHROUGH();
5549 case REQ_troff: // groff(7) "TROFF mode"
5550 {
5551 s_nroff = mode;
5552 c += j;
5553 c = skip_till_newline(c);
5554 break;
5555 }
5556 case REQ_als: // groff(7) "ALias String"
5557 {
5558 /*
5559 * Note an alias is supposed to be something like a hard link
5560 * However to make it simplier, we only copy the string.
5561 */
5562 // Be careful: unlike .rn, the destination is first, origin is second
5563 qCDebug(KIO_MAN_LOG) << "start .als";
5564 c += j;
5565 const QByteArray name(scan_identifier(c));
5566 if (name.isEmpty())
5567 {
5568 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination string to alias";
5569 break;
5570 }
5571 while (*c && isspace(*c) && *c != '\n') ++c;
5572 const QByteArray name2(scan_identifier(c));
5573 if (name2.isEmpty())
5574 {
5575 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to alias";
5576 break;
5577 }
5578 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name);
5579 c = skip_till_newline(c);
5580 if (name == name2)
5581 {
5582 qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination string to alias: " << BYTEARRAY(name);
5583 break;
5584 }
5585 // Second parameter is origin (unlike in .rn)
5586 QMap<QByteArray, StringDefinition>::iterator it = s_stringDefinitionMap.find(name2);
5587 if (it == s_stringDefinitionMap.end())
5588 {
5589 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias of " << BYTEARRAY(name2);
5590 }
5591 else
5592 {
5593 StringDefinition def = (*it);
5594 s_stringDefinitionMap.insert(name, def);
5595 }
5596 qCDebug(KIO_MAN_LOG) << "end .als";
5597 break;
5598 }
5599 case REQ_rr: // groff(7) "Remove number Register"
5600 {
5601 qCDebug(KIO_MAN_LOG) << "start .rr";
5602 c += j;
5603 const QByteArray name(scan_identifier(c));
5604 if (name.isEmpty())
5605 {
5606 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin string to remove/rename: ";
5607 break;
5608 }
5609 c = skip_till_newline(c);
5610 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
5611 if (it == s_numberDefinitionMap.end())
5612 {
5613 qCDebug(KIO_MAN_LOG) << "EXCEPTION: trying to remove inexistant number register: ";
5614 }
5615 else
5616 {
5617 s_numberDefinitionMap.remove(name);
5618 }
5619 qCDebug(KIO_MAN_LOG) << "end .rr";
5620 break;
5621 }
5622 case REQ_rnn: // groff(7) "ReName Number register"
5623 {
5624 qCDebug(KIO_MAN_LOG) << "start .rnn";
5625 c += j;
5626 const QByteArray name(scan_identifier(c));
5627 if (name.isEmpty())
5628 {
5629 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin to remove/rename number register";
5630 break;
5631 }
5632 while (*c && isspace(*c) && *c != '\n') ++c;
5633 const QByteArray name2(scan_identifier(c));
5634 if (name2.isEmpty())
5635 {
5636 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination to rename number register";
5637 break;
5638 }
5639 c = skip_till_newline(c);
5640 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name);
5641 if (it == s_numberDefinitionMap.end())
5642 {
5643 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find number register to rename" << BYTEARRAY(name);
5644 }
5645 else
5646 {
5647 NumberDefinition def = (*it);
5648 s_numberDefinitionMap.remove(name); // ### QT4: removeAll
5649 s_numberDefinitionMap.insert(name2, def);
5650 }
5651 qCDebug(KIO_MAN_LOG) << "end .rnn";
5652 break;
5653 }
5654 case REQ_aln: // groff(7) "ALias Number Register"
5655 {
5656 /*
5657 * Note an alias is supposed to be something like a hard link
5658 * However to make it simplier, we only copy the string.
5659 */
5660 // Be careful: unlike .rnn, the destination is first, origin is second
5661 qCDebug(KIO_MAN_LOG) << "start .aln";
5662 c += j;
5663 const QByteArray name(scan_identifier(c));
5664 if (name.isEmpty())
5665 {
5666 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty destination number register to alias";
5667 break;
5668 }
5669 while (*c && isspace(*c) && *c != '\n') ++c;
5670 const QByteArray name2(scan_identifier(c));
5671 if (name2.isEmpty())
5672 {
5673 qCDebug(KIO_MAN_LOG) << "EXCEPTION: empty origin number register to alias";
5674 break;
5675 }
5676 qCDebug(KIO_MAN_LOG) << "Alias " << BYTEARRAY(name2) << " to " << BYTEARRAY(name);
5677 c = skip_till_newline(c);
5678 if (name == name2)
5679 {
5680 qCDebug(KIO_MAN_LOG) << "EXCEPTION: same origin and destination number register to alias: " << BYTEARRAY(name);
5681 break;
5682 }
5683 // Second parameter is origin (unlike in .rnn)
5684 QMap<QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find(name2);
5685 if (it == s_numberDefinitionMap.end())
5686 {
5687 qCDebug(KIO_MAN_LOG) << "EXCEPTION: cannot find string to make alias: " << BYTEARRAY(name2);
5688 }
5689 else
5690 {
5691 NumberDefinition def = (*it);
5692 s_numberDefinitionMap.insert(name, def);
5693 }
5694 qCDebug(KIO_MAN_LOG) << "end .aln";
5695 break;
5696 }
5697 case REQ_shift: // groff(7) "SHIFT parameter"
5698 {
5699 c += j;
5700 h = c;
5701 while (*h && *h != '\n' && isdigit(*h)) ++h;
5702 const char tempchar = *h;
5703 *h = 0;
5704 const QByteArray number(c);
5705 *h = tempchar;
5706 c = skip_till_newline(h);
5707 unsigned int result = 1; // Numbers of shifts to do
5708 if (!number.isEmpty())
5709 {
5710 bool ok = false;
5711 result = number.toUInt(&ok);
5712 if (!ok || result < 1)
5713 result = 1;
5714 }
5715 for (unsigned int num = 0; num < result; ++num)
5716 {
5717 if (!s_argumentList.isEmpty())
5718 s_argumentList.pop_front();
5719 }
5720 break;
5721 }
5722 case REQ_while: // groff(7) "WHILE loop"
5723 {
5724 request_while(c, j, mandoc_command);
5725 break;
5726 }
5727 case REQ_break: // groff(7) Break out of a while loop.
5728 {
5729 c += j;
5730 break_the_while_loop = true;
5731 break;
5732 }
5733 case REQ_do: // groff(7) "DO command"
5734 {
5735 // ### HACK: we just replace do by a \n and a .
5736 *c = '\n';
5737 c++;
5738 *c = '.';
5739 // The . will be treated as next character
5740 break;
5741 }
5742 case REQ_nop: // groff(7) nop
5743 {
5744 c += j;
5745 break;
5746 }
5747 default:
5748 {
5749 if (mandoc_command &&
5750 ((isupper(*c) && islower(*(c + 1)))
5751 || (islower(*c) && isupper(*(c + 1)))))
5752 {
5753 /* Let through any mdoc(7) commands that haven't
5754 * been delt with.
5755 * I don't want to miss anything out of the text.
5756 */
5757 char buf[4] = { c[0], c[1], ' ', 0 };
5758 out_html(buf); /* Print the command (it might just be text). */
5759 c = c + j;
5760 trans_char(c, '"', '\a');
5761 if (*c == '\n') c++;
5762 out_html(set_font("R"));
5763 c = scan_troff(c, 1, nullptr);
5764 out_html(NEWLINE);
5765 if (fillout)
5766 curpos++;
5767 else
5768 curpos = 0;
5769 }
5770 else
5771 c = skip_till_newline(c);
5772 break;
5773 }
5774 }
5775 }
5776 }
5777 if (fillout)
5778 {
5779 out_html(NEWLINE);
5780 curpos++;
5781 }
5782 return c;
5783 }
5784
5785 //---------------------------------------------------------------------
5786
5787 static int contained_tab = 0;
5788 static bool mandoc_line = false; // Signals whether to look for embedded mandoc commands.
5789
scan_troff(char * c,bool san,char ** result)5790 static char *scan_troff(char *c, bool san, char **result)
5791 { /* san : stop at newline */
5792 QByteArray intbuff;
5793 intbuff.reserve(MED_STR_MAX);
5794 #define FLUSHIBP { out_html(intbuff); intbuff.clear(); }
5795 char *exbuffer;
5796 int exbuffpos, exbuffmax, exnewline_for_fun;
5797 bool exscaninbuff;
5798 int usenbsp = 0;
5799
5800 exbuffer = buffer;
5801 exbuffpos = buffpos;
5802 exbuffmax = buffmax;
5803 exnewline_for_fun = newline_for_fun;
5804 exscaninbuff = scaninbuff;
5805 newline_for_fun = 0;
5806 if (result)
5807 {
5808 if (*result)
5809 {
5810 buffer = *result;
5811 buffpos = qstrlen(buffer);
5812 buffmax = buffpos;
5813 }
5814 else
5815 {
5816 buffer = new char[LARGE_STR_MAX + 1];
5817 buffpos = 0;
5818 buffmax = LARGE_STR_MAX;
5819 }
5820 scaninbuff = true;
5821 }
5822 char *h = c; // ### FIXME below are too many tests that may go before the position of c
5823 /* start scanning */
5824
5825 while (h && *h && (!san || newline_for_fun || (*h != '\n')) && !break_the_while_loop)
5826 {
5827 if (*h == escapesym)
5828 {
5829 h++;
5830 FLUSHIBP;
5831 // ###HACK: I think after escape expansion, the line should be reparsed
5832 // (this seems to be what troff does), but it would double-escape
5833 // HTML chars, e.g. the first escape produces "<span...", the second
5834 // would change that to <span...
5835 // Therefore work around some man pages (e.g. nmap, smb.conf),
5836 // which have \." at beginning of
5837 // line (probably just typos), but troff would skip these
5838 if ( (h[-2] == '\n') && (*h == '.') ) // when line starts with \. ignore line
5839 {
5840 while (*h && (*h != '\n')) h++;
5841 continue; // avoid h++ at the end
5842 }
5843 else
5844 {
5845 h = scan_escape(h);
5846 }
5847 }
5848 else if (*h == controlsym && h[-1] == '\n')
5849 {
5850 h++;
5851 FLUSHIBP;
5852 h = scan_request(h);
5853 if (h && san && h[-1] == '\n') h--;
5854 }
5855 else if (mandoc_line
5856 && ((*(h - 1)) && (isspace(*(h - 1)) || (*(h - 1)) == '\n'))
5857 && *(h) && isupper(*(h))
5858 && *(h + 1) && islower(*(h + 1))
5859 && *(h + 2) && isspace(*(h + 2)))
5860 {
5861 // mdoc(7) embedded command eg ".It Fl Ar arg1 Fl Ar arg2"
5862 FLUSHIBP;
5863 h = scan_request(h);
5864 if (san && h[-1] == '\n')
5865 {
5866 h--;
5867 break;
5868 }
5869 }
5870 else if (*h == nobreaksym && h[-1] == '\n')
5871 {
5872 h++;
5873 FLUSHIBP;
5874 h = scan_request(h);
5875 if (san && h[-1] == '\n') h--;
5876 }
5877 else
5878 {
5879 if (still_dd && isalnum(*h) && h[-1] == '\n')
5880 {
5881 /* sometimes a .HP request is not followed by a .br request */
5882 FLUSHIBP;
5883 out_html("<DD>");
5884 curpos = 0;
5885 still_dd = false;
5886 }
5887 switch (*h)
5888 {
5889 case '&':
5890 {
5891 intbuff += "&";
5892 curpos++;
5893 break;
5894 }
5895 case '<':
5896 {
5897 intbuff += "<";
5898 curpos++;
5899 break;
5900 }
5901 case '>':
5902 {
5903 intbuff += ">";
5904 curpos++;
5905 break;
5906 }
5907 case '"':
5908 {
5909 intbuff += """;
5910 curpos++;
5911 break;
5912 }
5913 case '\n':
5914 {
5915 if (h != c && h[-1] == '\n' && fillout)
5916 {
5917 intbuff += "<p>";
5918 }
5919 if (contained_tab && fillout)
5920 {
5921 intbuff += "<br>";
5922 }
5923 contained_tab = 0;
5924 curpos = 0;
5925 usenbsp = 0;
5926 intbuff += '\n';
5927 FLUSHIBP;
5928 break;
5929 }
5930 case '\t':
5931 {
5932 int curtab = 0;
5933 contained_tab = 1;
5934 FLUSHIBP;
5935 /* like a typewriter, not like TeX */
5936 tabstops[19] = curpos + 1;
5937 while (curtab < maxtstop && tabstops[curtab] <= curpos)
5938 curtab++;
5939 if (curtab < maxtstop)
5940 {
5941 if (!fillout)
5942 {
5943 while (curpos < tabstops[curtab])
5944 {
5945 intbuff += ' ';
5946 if (intbuff.length() > MED_STR_MAX)
5947 {
5948 FLUSHIBP;
5949 }
5950 curpos++;
5951 }
5952 }
5953 else
5954 {
5955 out_html("<TT>");
5956 while (curpos < tabstops[curtab])
5957 {
5958 out_html(" ");
5959 curpos++;
5960 }
5961 out_html("</TT>");
5962 }
5963 }
5964 break;
5965 }
5966 default:
5967 {
5968 if (*h == ' ' && (h[-1] == '\n' || usenbsp))
5969 {
5970 FLUSHIBP;
5971 if (!usenbsp && fillout)
5972 {
5973 out_html("<BR>");
5974 curpos = 0;
5975 }
5976 usenbsp = fillout;
5977 if (usenbsp)
5978 out_html(" ");
5979 else
5980 intbuff += ' ';
5981 }
5982 else if (*h > 31 && *h < 127) intbuff += *h;
5983 else if (((unsigned char)(*h)) > 127)
5984 {
5985 intbuff += *h;
5986 }
5987 curpos++;
5988 break;
5989 }
5990 }
5991 if ( intbuff.length() > MED_STR_MAX ) FLUSHIBP;
5992 h++;
5993 }
5994 }
5995 FLUSHIBP;
5996 if (buffer) buffer[buffpos] = '\0';
5997 if (san && h && *h) h++;
5998 newline_for_fun = exnewline_for_fun;
5999 if (result)
6000 {
6001 *result = buffer;
6002 buffer = exbuffer;
6003 buffpos = exbuffpos;
6004 buffmax = exbuffmax;
6005 scaninbuff = exscaninbuff;
6006 }
6007
6008 return h;
6009 }
6010
6011 //---------------------------------------------------------------------
6012
scan_troff_mandoc(char * c,bool san,char ** result)6013 static char *scan_troff_mandoc(char *c, bool san, char **result)
6014 {
6015 char *ret;
6016 char *end = c;
6017 bool oldval = mandoc_line;
6018 mandoc_line = true;
6019 while (*end && *end != '\n')
6020 {
6021 end++;
6022 }
6023
6024 if (end > c + 2
6025 && ispunct(*(end - 1))
6026 && isspace(*(end - 2)) && *(end - 2) != '\n')
6027 {
6028 /* Don't format lonely punctuation E.g. in "xyz ," format
6029 * the xyz and then append the comma removing the space.
6030 */
6031 *(end - 2) = '\n';
6032 ret = scan_troff(c, san, result);
6033 *end = 0;
6034 out_html(end - 1); // output the punct char
6035 *end = '\n';
6036 ret = end;
6037 }
6038 else
6039 {
6040 ret = scan_troff(c, san, result);
6041 }
6042 mandoc_line = oldval;
6043 return ret;
6044 }
6045
6046 //---------------------------------------------------------------------
6047 // Entry point
6048
scan_man_page(const char * man_page)6049 void scan_man_page(const char *man_page)
6050 {
6051 if (!man_page)
6052 return;
6053
6054 qCDebug(KIO_MAN_LOG) << "Start scanning man page";
6055
6056 // ### Do more init
6057 // Unlike man2html, we actually call this several times, hence the need to
6058 // properly cleanup all those static vars
6059 s_ifelseval.clear();
6060
6061 s_characterDefinitionMap.clear();
6062 InitCharacterDefinitions();
6063
6064 s_stringDefinitionMap.clear();
6065 InitStringDefinitions();
6066
6067 s_numberDefinitionMap.clear();
6068 InitNumberDefinitions();
6069
6070 s_argumentList.clear();
6071 listItemStack.clear();
6072
6073 in_div = 0;
6074
6075 s_dollarZero = ""; // No macro called yet!
6076 mandoc_name = "";
6077
6078 output_possible = false;
6079 int strLength = qstrlen(man_page);
6080 char *buf = new char[strLength + 2];
6081 qstrcpy(buf + 1, man_page);
6082 buf[0] = '\n';
6083
6084 qCDebug(KIO_MAN_LOG) << "Parse man page";
6085
6086 scan_troff(buf + 1, 0, nullptr);
6087
6088 qCDebug(KIO_MAN_LOG) << "Man page parsed!";
6089
6090 while (itemdepth || dl_set[itemdepth])
6091 {
6092 checkListStack();
6093 out_html("</DL>\n");
6094 if (dl_set[itemdepth]) dl_set[itemdepth] = 0;
6095 else if (itemdepth > 0) itemdepth--;
6096 }
6097
6098 out_html(set_font("R"));
6099 out_html(change_to_size(0));
6100 if (!fillout)
6101 {
6102 fillout = 1;
6103 out_html("</PRE>");
6104 }
6105 out_html(NEWLINE);
6106
6107 if (in_div)
6108 {
6109 output_real("</div><div style=\"margin-left: 2cm\">\n");
6110 in_div = 0;
6111 }
6112
6113 if (output_possible)
6114 {
6115 // The output is buggy wrt to how divs are handled. Fixing it would
6116 // require closing divs before other block-level elements are output,
6117 // and I do not feel like going to find them all.
6118 output_real("</div></div></div></div>\n");
6119
6120 output_real("<div id=\"footer\"><div id=\"footer_text\">\n");
6121 #ifdef SIMPLE_MAN2HTML
6122 output_real("Generated by kio_man");
6123 #else
6124 output_real("Generated by kio_man version ");
6125 output_real(QString(KDE_VERSION_STRING).toHtmlEscaped().toLocal8Bit());
6126 #endif
6127 output_real("</div></div>\n\n");
6128
6129 output_real("</BODY>\n</HTML>\n");
6130 }
6131 delete [] buf;
6132
6133 // Release memory
6134 s_characterDefinitionMap.clear();
6135 s_stringDefinitionMap.clear();
6136 s_numberDefinitionMap.clear();
6137 s_argumentList.clear();
6138
6139 // reinit static variables for reuse
6140 delete [] buffer;
6141 buffer = nullptr;
6142
6143 escapesym = '\\';
6144 nobreaksym = '\'';
6145 controlsym = '.';
6146 fieldsym = 0;
6147 padsym = 0;
6148
6149 buffpos = 0;
6150 buffmax = 0;
6151 scaninbuff = false;
6152 itemdepth = 0;
6153 for (int i = 0; i < 20; i++)
6154 dl_set[i] = 0;
6155 still_dd = false;
6156 for (int i = 0; i < 12; i++)
6157 tabstops[i] = (i + 1) * 8;
6158 maxtstop = 12;
6159 curpos = 0;
6160
6161 mandoc_name_count = 0;
6162 }
6163
6164 //---------------------------------------------------------------------
6165
manPageToUtf8(const QByteArray & input,const QByteArray & dirName)6166 char *manPageToUtf8(const QByteArray &input, const QByteArray &dirName)
6167 {
6168 // as we do not know in which encoding the man source is, try to automatically
6169 // detect it and always return it as UTF-8
6170
6171 QByteArray encoding;
6172
6173 // some pages contain "coding:" information. See "man manconv"
6174 // (but I find pages which do not exactly obey the format described in manconv, e.g.
6175 // the control char is either "." or "'")
6176 // Therefore use a QRegularExpression
6177 const QRegularExpression regex("[\\.']\\\\\"[^$]*coding:\\s*(\\S*)\\s", QRegularExpression::CaseInsensitiveOption);
6178 QRegularExpressionMatch rmatch;
6179 if (QString::fromLatin1(input).indexOf(regex, 0, &rmatch) == 0)
6180 {
6181 encoding = rmatch.captured(1).toLatin1();
6182
6183 qCDebug(KIO_MAN_LOG) << "found embedded encoding" << encoding;
6184 }
6185 else
6186 {
6187 // check according to the dirName the man page is in
6188
6189 // if the dirName contains a ".", the encoding follows, e.g. "de.UTF-8"
6190 int dot = dirName.indexOf('.');
6191 if ( dot != -1 )
6192 {
6193 encoding = dirName.mid(dot + 1);
6194 }
6195 else
6196 {
6197 /* wanted to use KEncodingProber ... however it fails and gives very unreliable
6198 results ... telling me often UTF-8 encoded pages are EUC-JP or gb18030 ...
6199 In fact all man pages here on openSuse are encoded in UTF-8
6200
6201 KEncodingProber encodingProber;
6202 encodingProber.feed(input);
6203
6204 qCDebug(KIO_MAN_LOG) << "auto-detect encoding; guess=" << encodingProber.encoding()
6205 << "confidence=" << encodingProber.confidence();
6206
6207 encoding = encodingProber.encoding();
6208 */
6209
6210 // the original bug report #141340
6211 // mentioned the env var MAN_ICONV_INPUT_CHARSET ... let's check if it is set
6212 // This seems not be a std. man-db env var, but I find several traces of it on the web
6213 encoding = qgetenv("MAN_ICONV_INPUT_CHARSET");
6214
6215 if ( encoding.isEmpty() )
6216 encoding = "UTF-8";
6217 }
6218 }
6219
6220 QTextCodec *codec = 0;
6221
6222 if ( !encoding.isEmpty() )
6223 codec = QTextCodec::codecForName(encoding);
6224
6225 if ( !codec ) // fallback encoding
6226 codec = QTextCodec::codecForName("ISO-8859-1");
6227
6228 qCDebug(KIO_MAN_LOG) << "using the encoding" << codec->name() << "for file in dir" << dirName;
6229
6230 QString out = codec->toUnicode(input);
6231 QByteArray array = out.toUtf8();
6232
6233 // TODO get rid of this double allocation and scan a QByteArray
6234 const int len = array.size();
6235 char *buf = new char[len + 4];
6236 memmove(buf + 1, array.data(), len);
6237 buf[0] = buf[len+1] = '\n'; // Start and end with an end of line
6238 buf[len+2] = buf[len+3] = '\0'; // Two NUL characters at end
6239
6240 return buf;
6241 }
6242
6243 //---------------------------------------------------------------------
6244
6245 #ifdef SIMPLE_MAN2HTML
output_real(const char * insert)6246 void output_real(const char *insert)
6247 {
6248 std::cout << insert;
6249 }
6250
read_man_page(const char * filename)6251 char *read_man_page(const char *filename)
6252 {
6253 #if KARCHIVE_VERSION >= QT_VERSION_CHECK(5, 85, 0)
6254 KCompressionDevice fd(QFile::decodeName(filename));
6255 #else
6256 KFilterDev fd(QFile::decodeName(filename));
6257 #endif
6258 if ( !fd.open(QIODevice::ReadOnly) )
6259 {
6260 std::cerr << "read_man_page: can not open " << filename << std::endl;
6261 return nullptr;
6262 }
6263
6264 QDir dir(QFileInfo(QFile::decodeName(filename)).dir());
6265 dir.cdUp();
6266 char *data = manPageToUtf8(fd.readAll(), QFile::encodeName(dir.dirName()));
6267
6268 return data;
6269 }
6270
6271 //--------------------------------------------------------------------------------
6272
6273 #ifndef KIO_MAN_TEST
main(int argc,char ** argv)6274 int main(int argc, char **argv)
6275 {
6276 if (argc < 2)
6277 {
6278 std::cerr << "call: " << argv[0] << " <filename>\n";
6279 return 1;
6280 }
6281 if (chdir(argv[1]))
6282 {
6283 char *buf = read_man_page(argv[1]);
6284 if (buf)
6285 {
6286 scan_man_page(buf);
6287 delete [] buf;
6288 }
6289 }
6290 else
6291 {
6292 DIR *dir = opendir(".");
6293 struct dirent *ent;
6294 while ((ent = readdir(dir)) != nullptr)
6295 {
6296 std::cerr << "converting " << ent->d_name << std::endl;
6297 char *buf = read_man_page(ent->d_name);
6298 if (buf)
6299 {
6300 scan_man_page(buf);
6301 delete [] buf;
6302 }
6303 }
6304 closedir(dir);
6305 }
6306 return 0;
6307 }
6308 #endif
6309
6310
6311 #endif
6312
6313 // kate: indent-mode cstyle; space-indent on; indent-width 2; replace-tabs on;
6314