1 /*
2 khdoc.cc Hypertext Document Classes
3 Copyright (c) 1996-9,2000-2,2003,2007,2009 Kriang Lerdsuwanakij
4 email: lerdsuwa@users.sourceforge.net
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21 /*
22 Current supported HTML tags:
23 <!--...-->
24 <A HREF="...">...</A>
25 <A HREF="#...">...</A>
26 <A HREF="...#...">...</A>
27 <A NAME="...">...</A> (</A> optional but recommended)
28 <B>...</B>
29 <BLOCKQUOTE>...</BLOCKQUOTE>
30 <BR>
31 <CITE>...</CITE>
32 <DL><DT><DD>...</DL>
33 <EM>...</EM>
34 <HR>
35 <I>...</I>
36 <LI>
37 <OL>...</OL>
38 <P>...</P>
39 <PRE>...</PRE>
40 <STRONG>...</STRONG>
41 <TITLE>...</TITLE> (Max 50 chars for 80 columns,
42 escapes not allowed)
43 <UL>...</UL>
44 < > & " and all ISO 8859-1 character set
45
46 &#...;
47
48 Current partially supported tags:
49 <H?>...</H?>
50
51 Current supported non-HTML tags:
52 <A HREF=`...`>...</A>
53 <A CALL="...">...</A>
54 └ ┌ ┐ ┘
55 ┤ ├ ┴ ┬
56 ─ │ ┼
57 → ← ↓ ↑
58 */
59
60 /*
61 To do:
62 - Various control chars (LF, CR, tab) in title not allowed.
63 - Handle format & escape in title.
64 - Does not support nested <A ...> tags.
65 - Space part of link left at the end of line after wrap.
66 - Line with over 1000 columns will not be properly wrap. Make
67 some arrays dynamically allocated.
68 - Cannot handle large files > 1M, take too much RAM.
69 - Make classes more suitable for inheritance.
70 - Remove hardcoded NUM_COLUMN limit.
71 - Replace NUM_INDENT limit with STL stack.
72 */
73
74 #include "khdoc.h"
75 #include "cstrlib.h"
76 #include "gzfileio.h"
77 #include "seek.h"
78 #include "strmisc.h"
79 #include "gtstream.h"
80
81 #if HAVE_UNISTD_H
82 # include <unistd.h>
83 #endif
84 #include <sys/stat.h>
85
86 #ifdef USE_UTF8_MODE
87 # ifdef HAVE_WCHAR_H
88 # ifndef __USE_XOPEN
89 # define __USE_XOPEN
90 # endif
91 # include CXX__HEADER_cwchar
92 # include CXX__HEADER_cwctype
93 # endif
94 #endif
95
96 #include CXX__HEADER_fstream
97
98 // #define DEBUG_FORMAT_TEXT
99
100 /*************************************************************************
101 Hypertext functions
102 *************************************************************************/
103
104 #ifdef TRIM_NO_CHECK
105
ThrowErrorTagInside(const string &,const string &,char *,int,int)106 inline void ThrowErrorTagInside(const string &/*tag1*/, const string &/*tag2*/, char * /*buffer*/, int /*from*/, int /*length*/)
107 {
108 }
109
ThrowErrorNestedTag(const string &,char *,int,int)110 inline void ThrowErrorNestedTag(const string &/*tag*/, char * /*buffer*/, int /*from*/, int /*length*/)
111 {
112 }
113
ThrowErrorTagWithout(const string &,const string &,char *,int,int)114 inline void ThrowErrorTagWithout(const string &/*tag1*/, const string &/*tag2*/, char * /*buffer*/, int /*from*/, int /*length*/)
115 {
116 }
117
ThrowErrorSymbolNeeded(const string &,const string &,char *,int,int)118 inline void ThrowErrorSymbolNeeded(const string &/*tag1*/, const string &/*tag2*/, char * /*buffer*/, int /*from*/, int /*length*/)
119 {
120 }
121
ThrowErrorEndOfFile(const string &,char *,int,int)122 inline void ThrowErrorEndOfFile(const string &/*tag*/, char * /*buffer*/, int /*from*/, int /*length*/)
123 {
124 }
125
ThrowErrorCannotFind(const string &,char *,int,int)126 inline void ThrowErrorCannotFind(const string &/*tag*/, char * /*buffer*/, int /*from*/, int /*length*/)
127 {
128 }
129
ThrowErrorMessage(const string &,char *,int,int)130 inline void ThrowErrorMessage(const string &/*msg*/, char * /*buffer*/, int /*from*/, int /*length*/)
131 {
132 }
133
134 #else
135
136 // Find line number given index of buffer
137
GetSourceLine(char * buffer,size_t index)138 size_t GetSourceLine(char *buffer, size_t index)
139 {
140 size_t lineNo = 1;
141 for (size_t i = 0; i <= index; i++)
142 if (buffer[i] == '\n') // End of line found
143 lineNo++;
144 return lineNo;
145 }
146
ThrowErrorMessage(const string & msg,char * buffer,size_t from,size_t length)147 void ThrowErrorMessage(const string &msg, char *buffer, size_t from, size_t length)
148 {
149 gtstream bufstr;
150 gtout(bufstr, msg) << GetSourceLine(buffer, from);
151 bufstr << _("Content:\n")) << dump_bytes(buffer, from, length, 70);
152 throw ErrorGenericSyntax(bufstr);
153 }
154
ThrowErrorTag1(const string & tag,const string & msg,char * buffer,size_t from,size_t length)155 void ThrowErrorTag1(const string &tag, const string &msg, char *buffer, size_t from, size_t length)
156 {
157 gtstream bufstr;
158 gtout(bufstr, msg) << tag << GetSourceLine(buffer, from);
159 bufstr << _("Content:\n")) << dump_bytes(buffer, from, length, 70);
160 throw ErrorGenericSyntax(bufstr);
161 }
162
ThrowErrorTag2(const string & tag1,const string & tag2,const string & msg,char * buffer,size_t from,size_t length)163 void ThrowErrorTag2(const string &tag1, const string &tag2, const string &msg, char *buffer, size_t from, size_t length)
164 {
165 gtstream bufstr;
166 gtout(bufstr, msg) << tag1 << tag2 << GetSourceLine(buffer, from);
167 bufstr << _("Content:\n")) << dump_bytes(buffer, from, length, 70);
168 throw ErrorGenericSyntax(bufstr);
169 }
170
ThrowErrorTagInside(const string & tag1,const string & tag2,char * buffer,size_t from,size_t length)171 void ThrowErrorTagInside(const string &tag1, const string &tag2, char *buffer, size_t from, size_t length)
172 {
173 ThrowErrorTag2(tag1, tag2, _("%$ tag inside %$ tag at line %$\n"), buffer, from, length);
174 }
175
ThrowErrorNestedTag(const string & tag,char * buffer,size_t from,size_t length)176 void ThrowErrorNestedTag(const string &tag, char *buffer, size_t from, size_t length)
177 {
178 ThrowErrorTag1(tag, _("nested %$ tag at line %$\n"), buffer, from, length);
179 }
180
ThrowErrorTagWithout(const string & tag1,const string & tag2,char * buffer,size_t from,size_t length)181 void ThrowErrorTagWithout(const string &tag1, const string &tag2, char *buffer, size_t from, size_t length)
182 {
183 ThrowErrorTag2(tag1, tag2, _("%$ without %$ at line %$\n"), buffer, from, length);
184 }
185
ThrowErrorSymbolNeeded(const string & tag1,const string & tag2,char * buffer,size_t from,size_t length)186 void ThrowErrorSymbolNeeded(const string &tag1, const string &tag2, char *buffer, size_t from, size_t length)
187 {
188 ThrowErrorTag2(tag1, tag2, _("%$ needed %$ at line %$\n"), buffer, from, length);
189 }
190
ThrowErrorEndOfFile(const string & tag,char * buffer,size_t from,size_t length)191 void ThrowErrorEndOfFile(const string &tag, char *buffer, size_t from, size_t length)
192 {
193 ThrowErrorTag1(tag, _("unexpected end of file inside %$ at line %$\n"), buffer, from, length);
194 }
195
ThrowErrorCannotFind(const string & tag,char * buffer,size_t from,size_t length)196 void ThrowErrorCannotFind(const string &tag, char *buffer, size_t from, size_t length)
197 {
198 ThrowErrorTag1(tag, _("cannot find %$ at line %$\n"), buffer, from, length);
199 }
200
201 #endif /* TRIM_NO_CHECK */
202
203 /*************************************************************************
204 HyperDraw class
205 *************************************************************************/
206
~HyperDraw()207 HyperDraw::~HyperDraw()
208 {
209 }
210
211 /*************************************************************************
212 Hypertext class
213 *************************************************************************/
214
215 // Static member
216 const size_t HyperDocument::ALLOCSIZE;
217 const size_t HyperDocument::ALLOCINC;
218
Init()219 void HyperDocument::Init() // Used by constructors
220 {
221 numChar = NULL;
222 numIndent = 0;
223 }
224
ReInit()225 void HyperDocument::ReInit() // Used by LoadDocument's
226 {
227 if (isMyBuffer) // Make sure buffer is allocated by
228 // HyperDocument
229 if (buffer) // Make sure buffer is not NULL
230 free(buffer);
231 buffer = NULL;
232 isMyBuffer = false; // No longer own the buffer
233
234 ReInitFormat();
235 }
236
ReInitFormat()237 void HyperDocument::ReInitFormat()
238 {
239 delete [] numChar; // Deleting NULL is OK
240 numChar = NULL;
241
242 anchorList.erase(anchorList.begin(), anchorList.end());
243 sectionList.erase(sectionList.begin(), sectionList.end());
244 titleText.erase();
245
246 // Document-dependent variables
247
248 isInA = 0;
249 isInPre = 0;
250 procA = NULL;
251
252 curRow = 0;
253 curColumn = 0;
254 }
255
256 //------------------------------------------------------------------------
257 // Load document from buffer
258 //------------------------------------------------------------------------
259
HyperDocument(char * buffer_,size_t bufferSize_)260 HyperDocument::HyperDocument(char *buffer_, size_t bufferSize_)
261 {
262 Init();
263 InternalLoadDocument(buffer_, bufferSize_);
264 }
265
LoadDocument(char * buffer_,size_t bufferSize_)266 void HyperDocument::LoadDocument(char *buffer_, size_t bufferSize_)
267 {
268 ReInit();
269 InternalLoadDocument(buffer_, bufferSize_);
270 }
271
InternalLoadDocument(char * buffer_,size_t bufferSize_)272 void HyperDocument::InternalLoadDocument(char *buffer_, size_t bufferSize_)
273 {
274 buffer = buffer_;
275 bufferSize = bufferSize_;
276 isMyBuffer = false;
277 drawObj = 0;
278 }
279
280 //------------------------------------------------------------------------
281 // Empty document for run-time generation
282 //------------------------------------------------------------------------
283
HyperDocument(HyperDraw * drawObj_)284 HyperDocument::HyperDocument(HyperDraw *drawObj_)
285 {
286 Init();
287
288 buffer = 0;
289 bufferSize = 0;
290 isMyBuffer = true;
291 drawObj = drawObj_;
292 }
293
294 //------------------------------------------------------------------------
295 // Load document from the file given
296 //------------------------------------------------------------------------
297
298 #ifndef TRIM_NO_DOC_FILE
HyperDocument(const string & fileName)299 HyperDocument::HyperDocument(const string &fileName)
300 {
301 Init();
302 InternalLoadDocument(fileName);
303 }
304
LoadDocument(const string & fileName)305 void HyperDocument::LoadDocument(const string &fileName)
306 {
307 ReInit();
308 InternalLoadDocument(fileName);
309 }
310
InternalLoadDocument(const string & fileName)311 void HyperDocument::InternalLoadDocument(const string &fileName)
312 {
313 isMyBuffer = true;
314 buffer = NULL; // Not allocated yet
315 drawObj = 0;
316
317 gzFile fileGZ1 = NULL, fileGZ2 = NULL; // Avoid warning
318 bool isDir1 = true, isDir2 = true; // Default to true when
319 // file not found
320 struct stat fileStat;
321
322 fileGZ1 = gzopen(fileName, "rb");
323 try {
324 if (fileGZ1) {
325 k_stat(fileName, &fileStat);
326 isDir1 = S_ISDIR(fileStat.st_mode);
327 }
328
329 // File not found or is a directory
330 if (fileGZ1 == NULL || isDir1) {
331
332 string newFileName = fileName;
333
334 // Try a name with `.gz' added or removed
335 string::size_type pos = StrLast(fileName, ".gz");
336 if (pos != string::npos)
337 // Remove `.gz'
338 #ifdef _CXX_STRING_ERASE
339 newFileName.erase(pos, 3);
340 #else
341 // Same as newFileName.erase(pos, 3);
342 newFileName.replace(pos, 3,
343 static_cast<string::size_type>(0),
344 0);
345 #endif
346 else
347 // Add `.gz'
348 newFileName += ".gz";
349
350 fileGZ2 = gzopen(newFileName, "rb");
351 if (fileGZ2) {
352 k_stat(newFileName, &fileStat);
353 isDir2 = S_ISDIR(fileStat.st_mode);
354 }
355
356 // Both are not found or are directories
357 if (isDir1 && isDir2) {
358 throw ErrorGenericFile(_("cannot read file %$ or %$"),
359 fileName, newFileName);
360 }
361
362 ReadFile(fileGZ2);
363 }
364 else
365 ReadFile(fileGZ1);
366 }
367 catch (...) {
368 if (fileGZ1)
369 gzclose(fileGZ1);
370 if (fileGZ2)
371 gzclose(fileGZ2);
372 }
373 if (fileGZ1)
374 gzclose(fileGZ1);
375 if (fileGZ2)
376 gzclose(fileGZ2);
377 }
378 #endif /* TRIM_NO_DOC_FILE */
379
380 //------------------------------------------------------------------------
381 // Load document from opened file stream
382 //------------------------------------------------------------------------
383
384 #ifndef TRIM_NO_DOC_STREAM
HyperDocument(FILE * file)385 HyperDocument::HyperDocument(FILE *file)
386 {
387 Init();
388 InternalLoadDocument(file);
389 }
390
LoadDocument(FILE * file)391 void HyperDocument::LoadDocument(FILE *file)
392 {
393 ReInit();
394 InternalLoadDocument(file);
395 }
396
InternalLoadDocument(FILE * file)397 void HyperDocument::InternalLoadDocument(FILE *file)
398 {
399 isMyBuffer = true;
400 buffer = NULL; // Not allocated yet
401 drawObj = 0;
402
403 gzFile fileGZ;
404
405 fileGZ = gzdopen(fileno(file), "rb");
406 if (fileGZ == NULL)
407 throw ErrorGenericFile(_("cannot read file stream"));
408 try {
409 ReadFile(fileGZ);
410 }
411 catch (...) {
412 gzclose(fileGZ);
413 }
414 gzclose(fileGZ);
415 }
416 #endif /* TRIM_NO_DOC_STREAM */
417
418 //------------------------------------------------------------------------
419 // File I/O
420 //------------------------------------------------------------------------
421
422 #if ! (defined(TRIM_NO_DOC_STREAM) && defined(TRIM_NO_DOC_FILE))
ReadFile(gzFile file)423 void HyperDocument::ReadFile(gzFile file)
424 {
425 size_t bufferPtr = 0;
426 int byteRead; // Must be int, gzread returns -1
427 // upon failure
428 size_t readSize = ALLOCSIZE;
429
430 if (buffer) {
431 delete buffer;
432 }
433
434 buffer = static_cast<char *>(malloc(ALLOCSIZE));// Allocate buffer
435 // Need malloc
436 // because of realloc
437 // below
438 bufferSize = ALLOCSIZE;
439 if (buffer == NULL)
440 throw bad_alloc();
441
442 while((byteRead = gzread(file, buffer+bufferPtr, readSize)) ==
443 static_cast<int>(readSize)) {
444 char *newBuffer;
445 bufferPtr += byteRead; // Next read position
446 bufferSize += ALLOCINC; // New buffer size
447 readSize = ALLOCINC; // Number of bytes read
448 // Expand memory block
449 newBuffer = static_cast<char *>(realloc(buffer, bufferSize));
450 if (newBuffer == NULL)
451 throw bad_alloc();
452
453 buffer = newBuffer;
454 }
455 if (byteRead == -1)
456 throw ErrorGenericFile(_("error occurs while reading file"));
457
458 bufferSize = bufferSize-readSize+byteRead;
459 }
460 #endif
461
462 //------------------------------------------------------------------------
463 // Destructor
464 //------------------------------------------------------------------------
465
~HyperDocument()466 HyperDocument::~HyperDocument()
467 {
468 delete [] numChar; // Deleting NULL is OK
469
470 if (isMyBuffer)
471 if (buffer)
472 free(buffer);
473 }
474
475 //------------------------------------------------------------------------
476 // Format document
477 //------------------------------------------------------------------------
478
GetNestedAttr()479 attr_t HyperDocument::GetNestedAttr()
480 {
481 if (numNestA) {
482 if (numNestB)
483 return linkBoldA;
484 if (numNestI)
485 return linkItalicA;
486 return linkA;
487 }
488 else {
489 if (numNestB)
490 return boldA;
491 if (numNestI)
492 return italicA;
493 return normalA;
494 }
495 }
496
IndentLine(WINDOW * pad)497 void HyperDocument::IndentLine(WINDOW *pad)
498 {
499 #ifdef TRIM_NO_INDENT
500 if (numIndent > 0 && numIndent <= NUM_INDENT) {
501 if (pad) {
502 attr_t oldAttr;
503 short oldPair;
504 my_wattr_get(pad, &oldAttr, &oldPair);
505 wattrset(pad, normalA);
506 wmove(pad, curRow, curColumn);
507 for (int i = 0; i < indentPos[numIndent-1]; i++) {
508 waddch(pad, ' ');
509 }
510 my_wattr_set(pad, oldAttr, oldPair);
511 curColumn = indentPos[numIndent-1];
512 }
513 else
514 curColumn = indentPos[numIndent-1];
515 }
516 else if (numIndent == 0)
517 #endif
518 curColumn = 0;
519
520 if (pad) // Move cursor
521 wmove(pad, curRow, curColumn);
522 }
523
IndentColumn()524 int HyperDocument::IndentColumn()
525 {
526 #ifdef TRIM_NO_INDENT
527 if (numIndent > 0)
528 return indentPos[numIndent-1];
529 #endif
530 return 0; // No indent
531 }
532
533 #ifdef TRIM_NO_INDENT
ProcessLI(char *,size_t,WINDOW *,size_t)534 void HyperDocument::ProcessLI(char * /*buffer*/, size_t /*length*/, WINDOW * /*pad*/, size_t /*from*/)
535 {
536 }
537 #else /* TRIM_NO_INDENT */
ProcessLI(char * buffer,size_t length,WINDOW * pad,size_t from)538 void HyperDocument::ProcessLI(char *buffer, size_t length, WINDOW *pad, size_t from)
539 {
540 if (numIndent > 0 && numIndent <= NUM_INDENT) {
541 if (!pad) {
542 if (GetNestedAttr() != normalA) {
543 ThrowErrorTagInside("<LI>", _("<A ...>, <B> or <I>"),
544 buffer, from-1, length);
545 }
546 }
547 else {
548 attr_t oldAttr;
549 short oldPair;
550 my_wattr_get(pad, &oldAttr, &oldPair);
551 wattrset(pad, normalA);
552 wmove(pad, curRow, curColumn);
553 for (int i = 0; i < indentPos[numIndent-1]; i++) {
554 waddch(pad, ' ');
555 }
556 // Unordered list
557 switch (indentType[numIndent-1]) {
558 case IND_UL:
559 case IND_DL:
560 curColumn = indentPos[numIndent-1]-2;
561 wmove(pad, curRow, curColumn);
562 draw_entity(pad, EID_BULLET);
563 break;
564 case IND_OL:
565 { // Ordered list
566 int val = indentVal[numIndent-1];
567 curColumn = indentPos[numIndent-1]-4;
568 wmove(pad, curRow, curColumn);
569 if (val > 999) {
570 // Beyond program limit
571 // Must have width 3
572 my_waddstr(pad, _("***"));
573 }
574 else {
575 int printed = 0;
576 if (val/100) {
577 waddch(pad, val/100+'0');
578 printed = 1;
579 }
580 else
581 waddch(pad, ' ');
582 val = val%100;
583 if (val/10 || printed) {
584 waddch(pad, val/10+'0');
585 printed = 1;
586 }
587 else
588 waddch(pad, ' ');
589 val = val%10;
590 if (val || printed) {
591 waddch(pad, val+'0');
592 printed = 1;
593 }
594 else
595 waddch(pad, ' ');
596 }
597 indentVal[numIndent-1]++;
598 }
599 break;
600 case IND_BLOCK:
601 ThrowErrorTagInside("<LI>", "<BLOCKQUOTE>",
602 buffer, from-1, length);
603 }
604 my_wattr_set(pad, oldAttr, oldPair);
605 }
606 curColumn = indentPos[numIndent-1];
607 if (pad)
608 wmove(pad, curRow, curColumn);
609 }
610 }
611 #endif /* TRIM_NO_INDENT */
612
613 /* pad == NULL indicates that it is called from FindLink(...),
614 curRow & curColumn are ignored in this case */
615
ProcessA(char * buffer,size_t length,WINDOW * pad,size_t from,int curRow,int curColumn)616 void HyperDocument::ProcessA(char *buffer, size_t length, WINDOW *pad, size_t from,
617 int curRow, int curColumn)
618 {
619 size_t i = SeekNonSpace(buffer, from, length);
620 size_t j = SeekTokenEnd(buffer, i, length);
621
622 if (isInA == 1 && procA) { // Must have </A> for <A HREF=...>
623 // before another <A ...> tag
624 ThrowErrorNestedTag(_("<A ...>"),
625 buffer, from-2, length);
626 }
627 else if (i == j)
628 return;
629
630 int type = TYPE_NULL;
631 string name;
632 if (CompareStringCase(buffer+i, "name", j-i) == 0) {
633 type = TYPE_SECTION;
634 name = _("<A NAME=\"...\">");
635 }
636 else if (CompareStringCase(buffer+i, "href", j-i) == 0) {
637 type = TYPE_GOTO_OR_EXEC;
638 name = _("<A HREF=...>");
639 }
640 else if (CompareStringCase(buffer+i, "call", j-i) == 0) {
641 type = TYPE_CALL;
642 name = _("<A CALL=\"...\">");
643 }
644 else { // Unknown <A ????>
645 isInA = 1; // Wait for </A>
646 return;
647 }
648
649
650 if (buffer[j] == '>') {
651 ThrowErrorSymbolNeeded(_("`=\'"), name,
652 buffer, from-2, length);
653 }
654
655 j = SeekNonSpace(buffer, j, length); // Skip spaces
656 if (j == length) {
657 ThrowErrorEndOfFile(name,
658 buffer, from-2, length);
659 }
660 if (buffer[j] != '=') { // Check "="
661 ThrowErrorSymbolNeeded(_("`=\'"), name,
662 buffer, from-2, length);
663 }
664 j++; // Skip "="
665
666 j = SeekNonSpace(buffer, j, length); // Skip spaces
667 if (j == length) {
668 ThrowErrorEndOfFile(name,
669 buffer, from-2, length);
670 }
671
672 if (type == TYPE_GOTO_OR_EXEC) {
673 if (buffer[j] != '\"' && buffer[j] != '`') { /* Check " */
674 ThrowErrorSymbolNeeded(_("`\"\' or ``\'"), name,
675 buffer, from-2, length);
676 }
677 type = (buffer[j] == '\"' ? TYPE_GOTO : TYPE_EXEC);
678 }
679 else {
680 if (buffer[j] != '\"') { // Check "
681 ThrowErrorSymbolNeeded(_("`\"\'"), name,
682 buffer, from-2, length);
683 }
684 }
685
686 isInA = 1;
687
688 i = j+1;
689
690 try {
691 if (type == TYPE_EXEC)
692 // Search closing `
693 // Shell command, can be quoted
694 j = SeekChar('`', buffer, i, length, 2); // nest support
695 else
696 j = SeekChar('\"', buffer, i, length, 2); // Search closing "
697 // Quote support for kcd
698 // since dir name may
699 // contain special chars
700 }
701 catch (...) {
702 ThrowErrorEndOfFile(name,
703 buffer, from-2, length);
704 }
705
706 string quoted(buffer+i, j-i);
707 procA = new Anchor(UnquoteURLChars(quoted));
708
709 procA->type = type;
710 procA->rowFrom = curRow;
711 procA->colFrom = curColumn;
712
713 if (type == TYPE_SECTION) {
714 procA->rowTo = curRow; // Don't care where </A> is
715 procA->colTo = curColumn;
716
717 sectionList.push_back(sptr<Anchor>(procA));
718
719 procA = NULL;
720 }
721 else {
722 procA->rowTo = -1; // Still invalid
723 procA->colTo = -1;
724
725 anchorList.push_back(sptr<Anchor>(procA));
726 numNestA++;
727 if (pad)
728 wattrset(pad, GetNestedAttr());
729 }
730 }
731
NextLine(WINDOW * pad)732 void HyperDocument::NextLine(WINDOW *pad)
733 {
734 if (pad == NULL) { // Update document width
735 if (curColumn > numColumn)
736 numColumn = curColumn;
737 }
738 else // Record column width
739 numChar[curRow] = curColumn;
740
741 curRow++; // Go to new line
742 curColumn = 0;
743 IndentLine(pad); // Indent if necessary
744 }
745
ProcessHypertext(char * buffer,size_t length,WINDOW * pad)746 void HyperDocument::ProcessHypertext(char *buffer, size_t length, WINDOW *pad)
747 {
748 init_entity_table();
749
750 size_t i = 0, j = 0;
751 size_t k = 0;
752 int lastCharColumn = 0, lastCharRow = 0; /* Last char pos */
753 int lastPreColumn = 0, lastPreRow = 0; /* Last preformatted pos */
754 int lastChar = 0; /* Value ignored */
755
756 numIndent = 0; // Clear indent if this function is
757 // run the second time
758 numNestA = 0;
759 numNestB = 0;
760 numNestI = 0;
761
762 numNestBTag = 0;
763 numNestITag = 0;
764 numNestStrongTag = 0;
765 numNestEmTag = 0;
766 numNestCiteTag = 0;
767
768 if (pad) {
769 numChar = new short[numRow];
770
771 wattrset(pad, normalA); /* Normal attribute */
772 }
773
774 //#define DEBUG_FORMAT_TEXT
775 #ifdef DEBUG_FORMAT_TEXT
776 FILE *f;
777 if (pad) {
778 f = k_fopen("debug.pad", "wb");
779 if (f == NULL)
780 cerr << "ERROR1";
781 }
782 else {
783 f = k_fopen("debug.nopad", "wb");
784 if (f == NULL)
785 cerr << "ERROR2";
786 }
787 #endif
788
789 while (i < length) {
790
791 #ifdef DEBUG_FORMAT_TEXT
792 fprintf(f, "[%d] %c (%d,%d)\n", i, buffer[i], curRow, curColumn);
793 #endif
794 #ifdef DEBUG_WRAP
795 if (pad) {
796 pnoutrefresh(pad, 0, 0, 0, 0, scrRow-1, scrColumn-1);
797 wrefresh(stdscr);
798 getch();
799 }
800 #endif
801
802 if (buffer[i] == '<') { /* New tag found */
803 i++; /* Advance pointer past '<' */
804 j = SeekTokenEnd(buffer, i, length);
805
806 /* Look for tags */
807 if (j-i == 2 && CompareStringCase(buffer+i, "BR", j-i) == 0) {
808 NextLine(pad);
809 lastPreColumn = curColumn;
810 lastPreRow = curRow;
811 }
812
813 #ifndef TRIM_NO_HEADER
814 else if ((j-i == 2 && CompareStringCase(buffer+i, "H1", j-i) == 0) ||
815 (j-i == 2 && CompareStringCase(buffer+i, "H2", j-i) == 0) ||
816 (j-i == 2 && CompareStringCase(buffer+i, "H3", j-i) == 0) ||
817 (j-i == 2 && CompareStringCase(buffer+i, "H4", j-i) == 0) ||
818 (j-i == 2 && CompareStringCase(buffer+i, "H5", j-i) == 0) ||
819 (j-i == 2 && CompareStringCase(buffer+i, "H6", j-i) == 0)) {
820
821 if (curColumn != IndentColumn()) {
822 if (pad == NULL) {
823 if (curColumn > numColumn)
824 numColumn = curColumn;
825 }
826 else
827 numChar[curRow] = curColumn;
828 curRow++;
829 curColumn = 0;
830 }
831
832 NextLine(pad);
833 lastPreColumn = curColumn;
834 lastPreRow = curRow;
835 if (pad) {
836 wattrset(pad, headerA);
837 }
838 }
839 else if ((j-i == 3 && CompareStringCase(buffer+i, "/H1", j-i) == 0) ||
840 (j-i == 3 && CompareStringCase(buffer+i, "/H2", j-i) == 0) ||
841 (j-i == 3 && CompareStringCase(buffer+i, "/H3", j-i) == 0) ||
842 (j-i == 3 && CompareStringCase(buffer+i, "/H4", j-i) == 0) ||
843 (j-i == 3 && CompareStringCase(buffer+i, "/H5", j-i) == 0) ||
844 (j-i == 3 && CompareStringCase(buffer+i, "/H6", j-i) == 0) ||
845 (j-i == 2 && CompareStringCase(buffer+i, "DD", j-i) == 0) ||
846 (j-i == 1 && CompareStringCase(buffer+i, "P", j-i) == 0)) {
847
848 if (curColumn != IndentColumn()) {
849 if (pad == NULL) {
850 if (curColumn > numColumn)
851 numColumn = curColumn;
852 }
853 else
854 numChar[curRow] = curColumn;
855 curRow++;
856 curColumn = 0;
857 }
858
859 NextLine(pad);
860 lastPreColumn = curColumn;
861 lastPreRow = curRow;
862 if (pad) {
863 wattrset(pad, normalA);
864 }
865 }
866 #endif /* TRIM_NO_HEADER */
867
868 else if (j-i == 1 && CompareStringCase(buffer+i, "A", j-i) == 0 && pad) {
869 i++; /* Skip this "A" */
870 if (SeekNonSpace(buffer, i, length) > i) {
871 /* More after "<A" */
872 /* Process "HREF", "NAME" */
873 ProcessA(buffer, length, pad, i,
874 curRow, curColumn);
875 }
876 /* else just empty <A> tag, ignore it ? */
877 }
878 else if (j-i == 2 && CompareStringCase(buffer+i, "/A", j-i) == 0 && pad) {
879 if (isInA == 0) {
880 ThrowErrorTagWithout("</A>", _("<A HREF=...>"),
881 buffer, i-1, length);
882 }
883 else if (procA) { /* Inside <A HREF=...> */
884 procA->rowTo = curRow;
885 procA->colTo = curColumn-1; /* Prev column */
886 procA = NULL;
887
888 numNestA--;
889 if (pad)
890 wattrset(pad, GetNestedAttr());
891 }
892 isInA = 0;
893 }
894 else if (j-i == 3 && CompareStringCase(buffer+i, "PRE", j-i) == 0) {
895 if (isInPre) {
896 ThrowErrorNestedTag("<PRE>",
897 buffer, i-1, length);
898 }
899 isInPre++;
900 }
901 else if (j-i == 4 && CompareStringCase(buffer+i, "/PRE", j-i) == 0) {
902 if (!isInPre) {
903 ThrowErrorTagWithout("</PRE>", "<PRE>",
904 buffer, i-1, length);
905 }
906 isInPre--;
907 }
908
909 #ifndef TRIM_NO_STD_ATTRIB
910 else if (j-i == 1 && CompareStringCase(buffer+i, "B", j-i) == 0) {
911 numNestB++;
912 numNestBTag++;
913 if (pad)
914 wattrset(pad, GetNestedAttr());
915 }
916 else if (j-i == 2 && CompareStringCase(buffer+i, "/B", j-i) == 0) {
917 if (numNestBTag == 0) {
918 ThrowErrorTagWithout("</B>", "<B>",
919 buffer, i-1, length);
920 }
921 numNestB--;
922 numNestBTag--;
923 if (pad)
924 wattrset(pad, GetNestedAttr());
925 }
926 else if (j-i == 1 && CompareStringCase(buffer+i, "I", j-i) == 0) {
927 numNestI++;
928 numNestITag++;
929 if (pad)
930 wattrset(pad, GetNestedAttr());
931 }
932 else if (j-i == 2 && CompareStringCase(buffer+i, "/I", j-i) == 0) {
933 if (numNestITag == 0) {
934 ThrowErrorTagWithout("</I>", "<I>",
935 buffer, i-1, length);
936 }
937 numNestI--;
938 numNestITag--;
939 if (pad)
940 wattrset(pad, GetNestedAttr());
941 }
942 #endif /* TRIM_NO_STD_ATTRIB */
943
944 #ifndef TRIM_NO_EXTRA_ATTRIB
945 else if (j-i == 6 && CompareStringCase(buffer+i, "STRONG", j-i) == 0) {
946 numNestB++;
947 numNestStrongTag++;
948 if (pad)
949 wattrset(pad, GetNestedAttr());
950 }
951 else if (j-i == 7 && CompareStringCase(buffer+i, "/STRONG", j-i) == 0) {
952 if (numNestStrongTag == 0) {
953 ThrowErrorTagWithout("</STRONG>", "<STRONG>",
954 buffer, i-1, length);
955 }
956 numNestB--;
957 numNestStrongTag--;
958 if (pad)
959 wattrset(pad, GetNestedAttr());
960 }
961 else if (j-i == 2 && CompareStringCase(buffer+i, "EM", j-i) == 0) {
962 numNestI++;
963 numNestEmTag++;
964 if (pad)
965 wattrset(pad, GetNestedAttr());
966 }
967 else if (j-i == 3 && CompareStringCase(buffer+i, "/EM", j-i) == 0) {
968 if (numNestEmTag == 0) {
969 ThrowErrorTagWithout("</EM>", "<EM>",
970 buffer, i-1, length);
971 }
972 numNestI--;
973 numNestEmTag--;
974 if (pad)
975 wattrset(pad, GetNestedAttr());
976 }
977 else if (j-i == 4 && CompareStringCase(buffer+i, "CITE", j-i) == 0) {
978 numNestI++;
979 numNestCiteTag++;
980 if (pad)
981 wattrset(pad, GetNestedAttr());
982 }
983 else if (j-i == 5 && CompareStringCase(buffer+i, "/CITE", j-i) == 0) {
984 if (numNestCiteTag == 0) {
985 ThrowErrorTagWithout("</CITE>", "<CITE>",
986 buffer, i-1, length);
987 }
988 numNestI--;
989 numNestCiteTag--;
990 if (pad)
991 wattrset(pad, GetNestedAttr());
992 }
993 #endif /* TRIM_EXTRA_ATTRIB */
994
995 #ifndef TRIM_NO_INDENT
996 else if (j-i == 2 && CompareStringCase(buffer+i, "UL", j-i) == 0) {
997 if (numIndent < NUM_INDENT) {
998 indentType[numIndent] = IND_UL;
999 if (numIndent)
1000 indentPos[numIndent] =
1001 indentPos[numIndent-1]+4;
1002 else
1003 indentPos[numIndent] = 4;
1004 }
1005 numIndent++;
1006 }
1007 else if (j-i == 2 && CompareStringCase(buffer+i, "OL", j-i) == 0) {
1008 if (numIndent < NUM_INDENT) {
1009 indentType[numIndent] = IND_OL;
1010 if (numIndent)
1011 indentPos[numIndent] =
1012 indentPos[numIndent-1]+4;
1013 else
1014 indentPos[numIndent] = 4;
1015 indentVal[numIndent] = 1;
1016 }
1017 numIndent++;
1018 }
1019 else if (j-i == 2 && CompareStringCase(buffer+i, "DL", j-i) == 0) {
1020 if (numIndent < NUM_INDENT) {
1021 indentType[numIndent] = IND_DL;
1022 if (numIndent)
1023 indentPos[numIndent] =
1024 indentPos[numIndent-1]+4;
1025 else
1026 indentPos[numIndent] = 4;
1027 }
1028 numIndent++;
1029 }
1030 else if (j-i == 10 && CompareStringCase(buffer+i, "BLOCKQUOTE", j-i) == 0) {
1031 /* New line */
1032 if (curColumn != IndentColumn())
1033 NextLine(pad);
1034
1035 if (numIndent < NUM_INDENT) {
1036 indentType[numIndent] = IND_BLOCK;
1037 if (numIndent)
1038 indentPos[numIndent] =
1039 indentPos[numIndent-1]+4;
1040 else
1041 indentPos[numIndent] = 4;
1042 }
1043 numIndent++;
1044
1045 IndentLine(pad);
1046 lastPreColumn = curColumn;
1047 lastPreRow = curRow;
1048 }
1049 else if (j-i == 3 && CompareStringCase(buffer+i, "/UL", j-i) == 0) {
1050 if (curColumn != IndentColumn()) {
1051 NextLine();
1052 }
1053
1054 if (numIndent > 0)
1055 numIndent--;
1056
1057 NextLine(pad);
1058 lastPreColumn = curColumn;
1059 lastPreRow = curRow;
1060 }
1061 else if (j-i == 3 && CompareStringCase(buffer+i, "/OL", j-i) == 0) {
1062 if (curColumn != IndentColumn())
1063 NextLine(pad);
1064
1065 if (numIndent > 0)
1066 numIndent--;
1067
1068 NextLine(pad);
1069 lastPreColumn = curColumn;
1070 lastPreRow = curRow;
1071 }
1072 else if (j-i == 3 && CompareStringCase(buffer+i, "/DL", j-i) == 0) {
1073 if (curColumn != IndentColumn())
1074 NextLine(pad);
1075
1076 if (numIndent > 0)
1077 numIndent--;
1078
1079 NextLine(pad);
1080 lastPreColumn = curColumn;
1081 lastPreRow = curRow;
1082 }
1083 else if (j-i == 11 && CompareStringCase(buffer+i, "/BLOCKQUOTE", j-i) == 0) {
1084 if (curColumn != IndentColumn())
1085 NextLine(pad);
1086
1087 if (numIndent > 0)
1088 numIndent--;
1089
1090 NextLine(pad);
1091 lastPreColumn = curColumn;
1092 lastPreRow = curRow;
1093 }
1094 else if (j-i == 2 && CompareStringCase(buffer+i, "LI", j-i) == 0) {
1095 if (curColumn != IndentColumn())
1096 NextLine(pad);
1097
1098 ProcessLI(buffer, length, pad, i);
1099 lastPreColumn = curColumn;
1100 lastPreRow = curRow;
1101 }
1102 else if (j-i == 2 && CompareStringCase(buffer+i, "DT", j-i) == 0) {
1103 if (curColumn != IndentColumn())
1104 NextLine(pad);
1105
1106 NextLine(pad);
1107
1108 ProcessLI(buffer, length, pad, i);
1109 lastPreColumn = curColumn;
1110 lastPreRow = curRow;
1111 }
1112 #endif /* TRIM_NO_INDENT */
1113
1114 #ifndef TRIM_NO_RULE
1115 else if (j-i == 2 && CompareStringCase(buffer+i, "HR", j-i) == 0) {
1116 if (curColumn != IndentColumn())
1117 NextLine(pad);
1118
1119 NextLine(pad);
1120
1121 if (pad) {
1122 for (int curColumn = numColumn/4; curColumn < numColumn*3/4;
1123 curColumn++) {
1124 wmove(pad, curRow, curColumn);
1125 draw_entity(pad, EID_HLINE);
1126 }
1127 }
1128 NextLine(pad);
1129 NextLine(pad);
1130 lastPreColumn = curColumn;
1131 lastPreRow = curRow;
1132 }
1133 #endif /* TRIM_NO_RULE */
1134
1135 else if (j-i == 5 && CompareStringCase(buffer+i, "TITLE", j-i) == 0) {
1136 try {
1137 j = SeekChar('>', buffer, i+5, length);
1138 }
1139 catch (...) {
1140 ThrowErrorCannotFind(_("`>\'"),
1141 buffer, i-1, length);
1142 }
1143 j++; /* Skip '>' */
1144 k = j;
1145 try {
1146 j = SeekCaseString("</TITLE>", buffer, j, length);
1147 }
1148 catch (...) {
1149 ThrowErrorCannotFind("</TITLE>",
1150 buffer, i-1, length);
1151 }
1152 if (titleText.size()) {
1153 ThrowErrorMessage(_("cannot "
1154 "specify title more "
1155 "than once"),
1156 buffer, i-1, length);
1157 }
1158 /* Empty <TITLE></TITLE> allowed */
1159 if (j-k)
1160 titleText = string(buffer+k, j-k);
1161 }
1162 else if (CompareStringCase(buffer+i, "!--", 3) == 0 && pad) {
1163 /* Seek for "-->" */
1164 try {
1165 j = SeekString("-->", buffer, i, length);
1166 }
1167 catch (...) {
1168 ThrowErrorCannotFind(_("`-->\'"),
1169 buffer, i-1, length);
1170 }
1171 }
1172 /* else - ignore unregcognized tags */
1173
1174 /* Seek for '>' */
1175 /* Can be quoted */
1176 try {
1177 i = SeekChar('>', buffer, j, length, 2);
1178 /* nest support */
1179 }
1180 catch (...) {
1181 ThrowErrorCannotFind(_("`>\'"),
1182 buffer, i-1, length);
1183 }
1184 i++;
1185 }
1186 else {
1187 chtype bufChar = '?'; // Enough to cover 0..255
1188 #ifdef USE_UTF8_MODE
1189 bool use_wide = false;
1190 wchar_t bufWChar;
1191 int charWidth = 1;
1192 #endif
1193
1194 switch(buffer[i]) {
1195 case '\t':
1196 case '\n':
1197 case '\r':
1198 case '\b':
1199 case ' ':
1200 case '\0':
1201 bufChar = ' ';
1202 break;
1203 case '&':
1204 i++;
1205 /* Seek for ';' */
1206 try {
1207 j = SeekChar(';', buffer, i, length);
1208 }
1209 catch (...) {
1210 ThrowErrorCannotFind(_("`;\'"),
1211 buffer, i-1, length);
1212 }
1213
1214 if (buffer[i] == '#') {
1215 unsigned bufChar_ = 0;
1216 try {
1217 bufChar_ = StringToUnsigned(buffer+i+1, ';');
1218 }
1219 catch (ErrorRange) {
1220 ThrowErrorMessage(_("error in escape"),
1221 buffer, i-1, length);
1222 }
1223 if (bufChar_ > 255) {
1224 ThrowErrorMessage(_("character code too large"),
1225 buffer, i-1, length);
1226 }
1227 bufChar = static_cast<chtype>(bufChar_);
1228 }
1229 else {
1230 // For unknown escape
1231 bufChar = '?';
1232
1233 for (size_t k = 0; entity_table[k].name; ++k) {
1234 if (static_cast<size_t>(j-i) == entity_table[k].length
1235 && strncmp(buffer+i, entity_table[k].name, j-i) == 0) {
1236 // FIXME: Handle wide/combining entity
1237 #ifdef USE_UTF8_MODE
1238 if (IsUTF8Mode()) {
1239 use_wide = true;
1240 bufWChar = entity_table[k].wc;
1241 charWidth = wcwidth(bufWChar);
1242 }
1243 else
1244 #endif
1245 bufChar = entity_table[k].code;
1246 break;
1247 }
1248 }
1249
1250 }
1251 i = j; /* i++ appears later... */
1252 break;
1253 default: // ncurses only works with unsigned char
1254 // Assume no combining char
1255 #ifdef USE_UTF8_MODE
1256 if (IsUTF8Mode()) {
1257 static mbstate_t state;
1258 memset(&state, 0, sizeof(mbstate_t));
1259 int ret = mbrtowc(&bufWChar, buffer+i, length-i, &state);
1260 if (ret <= 0)
1261 throw ErrorBadSequence();
1262
1263 // FIXME: Check if printable logic correct?
1264 if (iswprint(bufWChar) || bufWChar == ' ') {
1265 charWidth = wcwidth(bufWChar);
1266 use_wide = true;
1267 i += ret-1;
1268 }
1269 else {
1270 bufChar = '?';
1271 }
1272 }
1273 else
1274 #endif
1275 if (isprint(buffer[i]) || buffer[i] == ' ') {
1276 bufChar = static_cast<unsigned char>(buffer[i]);
1277 }
1278 else {
1279 bufChar = static_cast<unsigned char>('?');
1280 }
1281 }
1282
1283 #ifdef USE_UTF8_MODE
1284 if (charWidth == 0) {
1285 if (pad)
1286 DrawWCharSub(pad, bufWChar);
1287 i++;
1288 continue;
1289 }
1290 else if (charWidth > 1) {
1291 if (curColumn > scrColumn-charWidth && isInPre == 0)
1292 NextLine(pad);
1293
1294 if (pad)
1295 DrawWCharSub(pad, bufWChar);
1296 else // Keep track of char for wrap
1297 for (int i = 0; i < charWidth; ++i)
1298 lineBuffer[curColumn+i] = '*';
1299 curColumn += charWidth;
1300 if (!isInPre) { // this char pos
1301 lastCharRow = curRow;
1302 lastCharColumn = curColumn-1;
1303 lastChar = bufChar;
1304 }
1305 else { // this preformatted char pos
1306 lastPreColumn = curColumn-1;
1307 lastPreRow = curRow;
1308 }
1309 i++;
1310 continue;
1311 }
1312 #endif
1313
1314 // Handle whitespaces
1315 // or line too long outside <PRE> tag
1316
1317 if (bufChar == ' ' ||
1318 (curColumn > scrColumn-1 && isInPre == 0)) {
1319 if (isInPre == 0) {
1320
1321 // Skip space at the beginning of line
1322 if (curColumn == IndentColumn() &&
1323 bufChar == ' ') {
1324 i++; // Skip
1325 }
1326
1327 // Ignore double spaces
1328 else if (curColumn > IndentColumn() &&
1329 lastCharRow == curRow &&
1330 lastCharColumn == curColumn-1 &&
1331 lastChar == ' ' && bufChar == ' ') {
1332
1333 lastCharColumn = curColumn;
1334 i++; // Skip
1335 }
1336
1337 // Do wrap
1338 // FIXME: Update for Unicode
1339 else if (curColumn > scrColumn-1) {
1340 chtype ch;
1341 int x = curColumn-1;
1342 if (pad)
1343 wmove(pad, curRow, x);
1344
1345 // Search for the last space in the line
1346 while ((pad ? (ch = winch(pad) &
1347 (A_CHARTEXT|A_ALTCHARSET)):
1348 lineBuffer[x])
1349 != ' ') {
1350 x--;
1351 if (x >= 0) { // Move cursor back
1352 if (pad)
1353 wmove(pad, curRow, x);
1354 if (curRow == lastPreRow &&
1355 x == lastPreColumn)
1356 // Can no longer wrapped
1357 // since it's preformatted
1358 break;
1359 }
1360 else // x < 0, wrap fails
1361 break;
1362 }
1363
1364 // Cannot break line
1365 if (x < 0 || (curRow == lastPreRow
1366 && x == lastPreColumn)) {
1367
1368 // We are printing a space
1369 // so break the line here
1370 if (bufChar == ' ') {
1371 NextLine(pad);
1372 lastPreColumn = curColumn;
1373 lastPreRow = curRow;
1374 }
1375
1376 // Print character beyond screen width
1377 else {
1378 if (pad) {
1379 wmove(pad, curRow, curColumn);
1380 waddch(pad, bufChar);
1381 }
1382 else { // This is OK
1383 lineBuffer[curColumn] = '*';
1384 }
1385 curColumn++;
1386 }
1387 i++; // Skip current char
1388 }
1389
1390 // Can break line
1391 else {
1392 x = curColumn; /* Leave the space as-is
1393 as it may be inside
1394 some link text */
1395
1396 int prevRow = curRow;
1397 int prevCol = curColumn-1;
1398 /* Init it to avoid
1399 compiler warning */
1400 attr_t oldAttr = normalA;
1401 short oldPair = 0;
1402
1403 NextLine(pad);
1404 lastPreColumn = curColumn;
1405 lastPreRow = curRow;
1406
1407 if (pad) {
1408 /* Update links position */
1409 for (sptr_list<Anchor>::reverse_iterator iter = anchorList.rbegin();
1410 iter != anchorList.rend()
1411 && ((*iter)->rowFrom == prevRow || (*iter)->rowTo == prevRow);
1412 ++iter) {
1413 if ((*iter)->rowFrom == prevRow && (*iter)->colFrom >= x) {
1414 (*iter)->rowFrom = curRow;
1415 (*iter)->colFrom -= x;
1416 }
1417 if ((*iter)->rowTo == prevRow && (*iter)->colTo >= x) {
1418 (*iter)->rowTo = curRow;
1419 (*iter)->colTo -= x;
1420 }
1421 }
1422
1423 /* Update section position */
1424 for (sptr_list<Anchor>::reverse_iterator iter = sectionList.rbegin();
1425 iter != sectionList.rend()
1426 && ((*iter)->rowFrom == prevRow || (*iter)->rowTo == prevRow);
1427 ++iter) {
1428 if ((*iter)->rowFrom == prevRow && (*iter)->colFrom >= x) {
1429 (*iter)->rowFrom = curRow;
1430 (*iter)->colFrom -= x;
1431 }
1432 if ((*iter)->rowTo == prevRow && (*iter)->colTo >= x) {
1433 (*iter)->rowTo = curRow;
1434 (*iter)->colTo -= x;
1435 }
1436 }
1437 my_wattr_get(pad, &oldAttr, &oldPair);
1438 wattrset(pad, A_NORMAL);
1439 }
1440 for (k = x; k <= static_cast<size_t>(prevCol); k++) {
1441 if (pad) {
1442 wmove(pad, prevRow, k);
1443 ch = winch(pad);
1444 waddch(pad, ' ');
1445 wmove(pad, curRow, curColumn);
1446 waddch(pad, ch);
1447 #ifdef DEBUG_WRAP
1448 pnoutrefresh(pad, 0, 0, 0, 0, scrRow-1, scrColumn-1);
1449 wrefresh(stdscr);
1450 getch();
1451 #endif
1452 }
1453 else {
1454 lineBuffer[curColumn] = lineBuffer[k];
1455 }
1456 curColumn++;
1457 }
1458 /* Display another space */
1459 if (pad) {
1460 my_wattr_set(pad, oldAttr, oldPair);
1461 wmove(pad, curRow, curColumn);
1462 waddch(pad, bufChar);
1463 }
1464 else
1465 lineBuffer[curColumn] = bufChar;
1466
1467 /* Track & remove contiguous space */
1468 lastCharRow = curRow;
1469 lastCharColumn = curColumn;
1470 lastChar = bufChar;
1471 curColumn++;
1472 if (curColumn == NUM_COLUMN) {
1473 NextLine(pad);
1474 lastPreColumn = curColumn;
1475 lastPreRow = curRow;
1476 }
1477 i++;
1478 }
1479 }
1480
1481 // Not reach screen width yet
1482 else {
1483 // Display it
1484 if (pad) {
1485 // No need to deal with Unicode
1486 // for space char.
1487 waddch(pad, bufChar);
1488 }
1489 else
1490 lineBuffer[curColumn] = bufChar;
1491
1492 // Track & remove contiguous space
1493 lastCharRow = curRow;
1494 lastCharColumn = curColumn;
1495 lastChar = bufChar;
1496 curColumn++;
1497 if (curColumn == NUM_COLUMN) {
1498 NextLine(pad);
1499 lastPreColumn = curColumn;
1500 lastPreRow = curRow;
1501 }
1502 i++;
1503 }
1504 }
1505 else { // For whitespaces
1506 // inside <PRE> tag
1507
1508 lastPreColumn = curColumn;
1509 lastPreRow = curRow;
1510
1511 switch(buffer[i]) {
1512 case ' ':
1513 case '\0':
1514 if (pad)
1515 waddch(pad, ' ');
1516 else // No wrap char
1517 lineBuffer[curColumn] = '*';
1518 curColumn++;
1519 break;
1520 case '\t':
1521 do { // Add space to pad
1522 if (pad)
1523 waddch(pad, ' ');
1524 else // No wrap char
1525 lineBuffer[curColumn] = '*';
1526 curColumn++;
1527 } while(curColumn % 8);
1528 break;
1529 case '\n':
1530 NextLine(pad);
1531 lastPreColumn = curColumn;
1532 lastPreRow = curRow;
1533 break;
1534 }
1535 if (curColumn == NUM_COLUMN) {
1536 NextLine(pad);
1537 lastPreColumn = curColumn;
1538 lastPreRow = curRow;
1539 }
1540 i++;
1541 }
1542 }
1543
1544 else { // Can be displayed directly
1545
1546 if (pad) { // Add char. to pad
1547 #ifdef USE_UTF8_MODE
1548 if (use_wide)
1549 DrawWCharSub(pad, bufWChar);
1550 else
1551 #endif
1552 waddch(pad, bufChar);
1553 }
1554 else // Keep track of char for wrap
1555 lineBuffer[curColumn] = '*';
1556 if (!isInPre) { // this char pos
1557 lastCharRow = curRow;
1558 lastCharColumn = curColumn;
1559 lastChar = bufChar;
1560 }
1561 else { // this preformatted char pos
1562 lastPreColumn = curColumn;
1563 lastPreRow = curRow;
1564 }
1565 if (curColumn == NUM_COLUMN) {
1566 NextLine(pad);
1567 lastPreColumn = curColumn;
1568 lastPreRow = curRow;
1569 }
1570 curColumn++;
1571 i++;
1572 }
1573 }
1574 }
1575 if (isInA == 1) {
1576 throw ErrorGenericSyntax(_("unexpected end of file, missing </A>"));
1577 }
1578
1579 if (isInPre == 1) {
1580 throw ErrorGenericSyntax(_("unexpected end of file, missing </PRE>"));
1581 }
1582
1583 #ifdef DEBUG_FORMAT_TEXT
1584 fclose(f);
1585 #endif
1586
1587 if (pad == NULL) { // Update document size
1588 if (curColumn > numColumn)
1589 numColumn = curColumn;
1590 numRow = curRow+1; // Off-by-1 different
1591 }
1592 else { // *** Move check to loop above ??
1593 if (curRow != numRow-1 && curRow > scrRow) {
1594 throw ErrorGenericSyntax(_("hypertext size calculation not match actual hypertext parse\n"
1595 "Number of row: %$ (calculatation) %$ (parse)"),
1596 numRow, curRow+1);
1597 }
1598
1599 for (i = curRow; i < static_cast<size_t>(numRow); i++) {
1600 numChar[i] = curColumn;
1601 curColumn = 0;
1602 }
1603
1604 // #define DEBUG_NUM_CHAR
1605 #ifdef DEBUG_NUM_CHAR
1606 FILE *f2;
1607 f2 = k_fopen("debug.num", "wb");
1608 if (f2 == NULL)
1609 cerr << "ERROR1";
1610 else {
1611 for (i = 0; i < numRow; i++)
1612 fprintf(f2, "[%d] %d\n", i, static_cast<int>(numChar[i]));
1613 fclose(f2);
1614 }
1615 #endif
1616
1617
1618 }
1619 }
1620
1621 // pad == NULL when we want to find document dimension
1622 // row, col = Screen size for hypertext display. Minimum pad size used here
FormatDocument(WINDOW * pad,int row,int col)1623 void HyperDocument::FormatDocument(WINDOW *pad, int row, int col)
1624 {
1625 ReInitFormat(); // Initialize document dependent variables
1626 // without freeing buffer
1627
1628 scrRow = row;
1629 scrColumn = col;
1630
1631 if (! drawObj) {
1632 if (pad) { // We only need following when we output to pad
1633 ;
1634 }
1635 else { // We are going to find document dimension
1636 numRow = 1; // Minimum limit - must contain an empty line
1637 numColumn = 0;
1638 }
1639 ProcessHypertext(buffer, bufferSize, pad);
1640 }
1641 else {
1642 drawObj->Draw(*this, pad);
1643 }
1644 }
1645
SetDocumentSize(int docRow,int docCol,int row,int col)1646 void HyperDocument::SetDocumentSize(int docRow, int docCol, int row, int col)
1647 {
1648 scrRow = row; // Screen size for hypertext display
1649 scrColumn = col;
1650
1651 numColumn = docCol; // Document size
1652 numRow = docRow;
1653 }
1654
1655 //------------------------------------------------------------------------
1656 // Find link
1657 //------------------------------------------------------------------------
1658
ReadATag()1659 int HyperDocument::ReadATag()
1660 {
1661 int currentID = 0; // ID used to replace rowFrom member of Anchor
1662 // It is used to sync between anchorList and
1663 // sectionList
1664 size_t i = 0, j = 0;
1665
1666 ReInitFormat(); // Initialize document dependent variables
1667 // without freeing buffer
1668
1669 while (i < bufferSize) {
1670
1671 if (buffer[i] == '<') { /* New tag found */
1672 i++; /* Advance pointer past '<' */
1673 j = SeekTokenEnd(buffer, i, bufferSize);
1674
1675 if (j-i == 1 && CompareStringCase(buffer+i, "A", j-i) == 0) {
1676 i++; /* Skip this "A" */
1677 if (SeekNonSpace(buffer, i, bufferSize) > i) {
1678 /* More after "<A" */
1679 /* Process "HREF", "NAME", "CALL" */
1680 ProcessA(buffer, bufferSize,
1681 NULL, i,
1682 currentID, -1);
1683 currentID++;
1684 }
1685 /* else just empty <A> tag, ignore it ? */
1686 }
1687 else if (j-i == 2 && CompareStringCase(buffer+i, "/A", j-i) == 0) {
1688 if (isInA == 0) {
1689 ThrowErrorTagWithout("</A>", _("<A HREF=...>"),
1690 buffer, i-1, bufferSize);
1691 }
1692 if (procA) {
1693 procA = NULL;
1694 numNestA--;
1695 }
1696 isInA = 0;
1697 }
1698
1699 /* else - ignore unregcognized tags */
1700
1701 /* Seek for '>' */
1702 /* Can be quoted */
1703 try {
1704 i = SeekChar('>', buffer, j, bufferSize, 2);
1705 /* nest support */
1706 }
1707 catch (...) {
1708 ThrowErrorCannotFind(_("`>\'"),
1709 buffer, i-1, bufferSize);
1710 }
1711 i++;
1712 }
1713 else
1714 i++; // Check next char
1715 }
1716 return RET_OK;
1717 }
1718
CheckString(const string & s,const string & substr)1719 int CheckString(const string &s, const string &substr)
1720 {
1721 string::size_type sub_idx = s.rfind(substr);
1722 if (sub_idx == string::npos) // Cannot match, substring not found
1723 // in the anchor text
1724 return 0;
1725
1726 // We must check if the matched string appears
1727 // after the last `/' in anchor text
1728
1729 string::size_type dir_idx = s.rfind('/');
1730 if (dir_idx == string::npos) // Not found
1731 return 1;
1732 if (dir_idx < sub_idx) // Match last portion of directory
1733 return 1;
1734 return 0;
1735 }
1736
FindLink(const string & startSection,const string & findLink)1737 Anchor *HyperDocument::FindLink(const string &startSection, const string &findLink)
1738 {
1739 if (ReadATag() != RET_OK) // Read all <A HREF=...>,
1740 // <A CALL="..."> and
1741 // <A NAME="..."> tags from buffer
1742 return NULL; // Error detected
1743 return FindNextLink(startSection, findLink);
1744 }
1745
FindNextLink(const string & startSection,const string & findLink)1746 Anchor *HyperDocument::FindNextLink(const string &startSection, const string &findLink)
1747 {
1748 if (anchorList.size() == 0) { // No <A HREF=...> or <A CALL="...">
1749 // found
1750
1751 // Report error
1752 throw ErrorGenericSyntax(_("no <A HREF=...> or <A CALL=\"...\"> tags present\n"
1753 "Do not use the `-l\' option for this file"));
1754 }
1755
1756 Anchor *curSection = 0;
1757 sptr_list<Anchor>::iterator curSectionIter;
1758
1759 if (startSection.size()) {
1760 // Seek to the given startSection
1761
1762 for (curSectionIter = sectionList.begin();
1763 curSectionIter != sectionList.end(); ++curSectionIter) {
1764
1765 // Found <A NAME="..."> tag that
1766 // matches startSection
1767 if ((*curSectionIter)->name == startSection) {
1768 curSection = (*curSectionIter)();
1769 break;
1770 }
1771 }
1772
1773 // Cannot find the desired tag
1774 if (!curSection) {
1775 throw ErrorGenericSyntax(_("cannot find <A NAME=\"%$\">"),
1776 startSection);
1777 }
1778 }
1779
1780 // Need to find the link after
1781 // startSection if startSection
1782 // given
1783 sptr_list<Anchor>::iterator curAnchorIter;
1784
1785 if (curSection) {
1786 // Find anchor directly after curSection
1787
1788 for (curAnchorIter = anchorList.begin();
1789 curAnchorIter != anchorList.end(); ++curAnchorIter) {
1790
1791 // Skip a <A HREF=...> or <A CALL="...">
1792 // after curSection.
1793 // This is used to ensure that
1794 // kcd `dir' cycles through every matched
1795 // `dir'
1796
1797 // Found
1798 if ((*curAnchorIter)->rowFrom > (*curSectionIter)->rowFrom+1) {
1799 break;
1800 }
1801 }
1802
1803 // Zero or one <A HREF=...> or <A CALL="...">
1804 // after curSection
1805 if (curAnchorIter == anchorList.end()) {
1806
1807 // Let's use the first anchor of the document
1808 // instead
1809 curAnchorIter = anchorList.begin();
1810 }
1811 }
1812
1813 // Find the first anchor after curAnchor
1814 // that matches findLink
1815 Anchor *anchorSelected = 0;
1816 sptr_list<Anchor>::iterator anchorSelectedIter = curAnchorIter;
1817
1818 do {
1819 if (CheckString((*anchorSelectedIter)->name, findLink)) {
1820
1821 // Found !
1822 anchorSelected = (*anchorSelectedIter)(); // Set return value
1823 break;
1824 }
1825 next_loop(anchorList, anchorSelectedIter);
1826 } while (curAnchorIter != anchorSelectedIter); // Make sure loop not repeated
1827
1828 return anchorSelected;
1829 }
1830
1831 //------------------------------------------------------------------------
1832 // Draw functions
1833 //------------------------------------------------------------------------
1834
DrawSetSize(int row,int col)1835 void HyperDocument::DrawSetSize(int row, int col)
1836 {
1837 numRow = row;
1838 numColumn = col;
1839 }
1840
DrawBegin(WINDOW * pad)1841 void HyperDocument::DrawBegin(WINDOW *pad)
1842 {
1843 init_entity_table();
1844
1845 curRow = 0;
1846 curColumn = 0;
1847 numChar = new short[numRow];
1848 for (size_t i = 0; i < static_cast<size_t>(numRow); ++i)
1849 numChar[i] = numColumn;
1850
1851 numNestA = 0;
1852 numNestB = 0;
1853 numNestI = 0;
1854 numIndent = 0;
1855
1856 wmove(pad, 0, 0);
1857 }
1858
DrawSetTitle(const char * s)1859 void HyperDocument::DrawSetTitle(const char *s)
1860 {
1861 titleText = s;
1862 }
1863
DrawSetTitle(const string & s)1864 void HyperDocument::DrawSetTitle(const string &s)
1865 {
1866 titleText = s;
1867 }
1868
DrawSetItalic(WINDOW * pad)1869 void HyperDocument::DrawSetItalic(WINDOW *pad)
1870 {
1871 if (pad) {
1872 numNestI++;
1873 wattrset(pad, GetNestedAttr());
1874 }
1875 }
1876
DrawClearItalic(WINDOW * pad)1877 void HyperDocument::DrawClearItalic(WINDOW *pad)
1878 {
1879 if (pad) {
1880 if (numNestI > 0)
1881 numNestI--;
1882 wattrset(pad, GetNestedAttr());
1883 }
1884 }
1885
DrawSetBold(WINDOW * pad)1886 void HyperDocument::DrawSetBold(WINDOW *pad)
1887 {
1888 if (pad) {
1889 numNestB++;
1890 wattrset(pad, GetNestedAttr());
1891 }
1892 }
1893
DrawClearBold(WINDOW * pad)1894 void HyperDocument::DrawClearBold(WINDOW *pad)
1895 {
1896 if (pad) {
1897 if (numNestB > 0)
1898 numNestB--;
1899 wattrset(pad, GetNestedAttr());
1900 }
1901 }
1902
DrawCharSub(WINDOW * pad,chtype c)1903 void HyperDocument::DrawCharSub(WINDOW *pad, chtype c)
1904 {
1905 if (c == '\n') {
1906 // Record column width
1907 NextLine(pad);
1908 }
1909 else if (c == '\t' || c == '\r' || c == '\b' || c == '\0')
1910 ;
1911 else {
1912 if (isprint(c))
1913 waddch(pad, static_cast<unsigned char>(c));
1914 else
1915 waddch(pad, '?');
1916 curColumn++;
1917 }
1918 }
1919
DrawChar(WINDOW * pad,chtype c)1920 void HyperDocument::DrawChar(WINDOW *pad, chtype c)
1921 {
1922 if (pad) {
1923 if (procA)
1924 procA->linkText += static_cast<char>(c & 0xFF);
1925
1926 DrawCharSub(pad, c);
1927 }
1928 }
1929
1930 #ifdef USE_UTF8_MODE
DrawWCharSub(WINDOW * pad,wchar_t wc)1931 void HyperDocument::DrawWCharSub(WINDOW *pad, wchar_t wc)
1932 {
1933 if (wc == '\n') {
1934 // Record column width
1935 NextLine(pad);
1936 }
1937 else if (wc == '\t' || wc == '\r' || wc == '\b' || wc == '\0')
1938 ;
1939 else {
1940 if (iswprint(wc)) {
1941 int w = wcwidth(wc);
1942 if (w == 0 && curColumn) {
1943 cchar_t cc;
1944 wmove(pad, curRow, curColumn-1);
1945 win_wch(pad, &cc);
1946
1947 bool done = false;
1948 for (size_t i = 0; i < CCHARW_MAX; ++i) {
1949 if (!cc.chars[i]) {
1950 cc.chars[i] = wc;
1951 done = true;
1952 break;
1953 }
1954 }
1955
1956 if (done) {
1957 wmove(pad, curRow, curColumn-1);
1958 wadd_wch(pad, &cc);
1959 }
1960 else {
1961 wmove(pad, curRow, curColumn);
1962 waddnwstr(pad, &wc, 1);
1963 }
1964 }
1965 else {
1966 waddnwstr(pad, &wc, 1);
1967 }
1968 curColumn += wcwidth(wc);
1969 }
1970 else {
1971 waddch(pad, '?');
1972 curColumn++;
1973 }
1974 }
1975 }
1976 #endif /* USE_UTF8_MODE */
1977
DrawEntityChar(WINDOW * pad,entity_id id)1978 void HyperDocument::DrawEntityChar(WINDOW *pad, entity_id id)
1979 {
1980 if (pad) {
1981 curColumn++;
1982 draw_entity(pad, id);
1983 }
1984 }
1985
DrawString(WINDOW * pad,const char * s)1986 void HyperDocument::DrawString(WINDOW *pad, const char *s)
1987 {
1988 if (pad) {
1989 if (procA)
1990 procA->linkText += s;
1991
1992 //ofstream ofs("xx", ios::app);
1993 //ofs << curRow << ' ' << curColumn << ' ' << s << '\n';
1994 #ifdef USE_UTF8_MODE
1995 // FIXME: Handle wide/combining chars, UTF-8 encoding
1996 // if (0) {
1997 if (IsUTF8Mode()) {
1998 size_t length = strlen(s);
1999
2000 for (size_t i = 0; s[i] != '\0'; ) {
2001 wchar_t wc;
2002 static mbstate_t state;
2003 memset(&state, 0, sizeof(mbstate_t));
2004 size_t ret = mbrtowc(&wc, s+i, length-i, &state);
2005 if (ret == 0 || ret == static_cast<size_t>(-1)
2006 || ret == static_cast<size_t>(-2))
2007 throw ErrorBadSequence();
2008
2009 // FIXME: Check if printable logic correct?
2010 if (iswprint(wc) || wc == ' ') {
2011 DrawWCharSub(pad, wc);
2012 }
2013 else {
2014 DrawCharSub(pad, '?');
2015 }
2016 i += ret;
2017 }
2018 }
2019 else
2020 #endif
2021 for (size_t i = 0; s[i] != '\0'; ++i)
2022 DrawCharSub(pad, s[i]);
2023 }
2024 }
2025
DrawString(WINDOW * pad,const string & s)2026 void HyperDocument::DrawString(WINDOW *pad, const string &s)
2027 {
2028 DrawString(pad, s.c_str());
2029 }
2030
DrawAName(WINDOW * pad,const char * s)2031 void HyperDocument::DrawAName(WINDOW *pad, const char *s)
2032 {
2033 if (pad) {
2034 Anchor *proc = new Anchor(s);
2035
2036 proc->type = TYPE_SECTION;
2037 proc->rowFrom = curRow;
2038 proc->colFrom = curColumn;
2039
2040 proc->rowTo = curRow; // Don't care where </A> is
2041 proc->colTo = curColumn;
2042
2043 sectionList.push_back(sptr<Anchor>(proc));
2044 }
2045 }
2046
DrawAName(WINDOW * pad,const string & s)2047 void HyperDocument::DrawAName(WINDOW *pad, const string &s)
2048 {
2049 if (pad) {
2050 Anchor *proc = new Anchor(s);
2051
2052 proc->type = TYPE_SECTION;
2053 proc->rowFrom = curRow;
2054 proc->colFrom = curColumn;
2055
2056 proc->rowTo = curRow; // Don't care where </A> is
2057 proc->colTo = curColumn;
2058
2059 sectionList.push_back(sptr<Anchor>(proc));
2060 }
2061 }
2062
DrawACallBegin(WINDOW * pad,const char * s)2063 void HyperDocument::DrawACallBegin(WINDOW *pad, const char *s)
2064 {
2065 if (pad) {
2066 numNestA++;
2067 wattrset(pad, GetNestedAttr());
2068
2069 procA = new Anchor(s);
2070
2071 procA->type = TYPE_CALL;
2072 procA->rowFrom = curRow;
2073 procA->colFrom = curColumn;
2074
2075 procA->rowTo = -1;
2076 procA->colTo = -1;
2077
2078 anchorList.push_back(sptr<Anchor>(procA));
2079 }
2080 }
2081
DrawACallBegin(WINDOW * pad,const string & s)2082 void HyperDocument::DrawACallBegin(WINDOW *pad, const string &s)
2083 {
2084 if (pad) {
2085 numNestA++;
2086 wattrset(pad, GetNestedAttr());
2087
2088 procA = new Anchor(s);
2089
2090 procA->type = TYPE_CALL;
2091 procA->rowFrom = curRow;
2092 procA->colFrom = curColumn;
2093
2094 procA->rowTo = -1;
2095 procA->colTo = -1;
2096
2097 anchorList.push_back(sptr<Anchor>(procA));
2098 }
2099 }
2100
DrawAHrefBegin(WINDOW * pad,const char * s)2101 void HyperDocument::DrawAHrefBegin(WINDOW *pad, const char *s)
2102 {
2103 if (pad) {
2104 numNestA++;
2105 wattrset(pad, GetNestedAttr());
2106
2107 procA = new Anchor(s);
2108
2109 procA->type = TYPE_GOTO;
2110 procA->rowFrom = curRow;
2111 procA->colFrom = curColumn;
2112
2113 procA->rowTo = -1;
2114 procA->colTo = -1;
2115
2116 anchorList.push_back(sptr<Anchor>(procA));
2117 }
2118 }
2119
DrawAHrefBegin(WINDOW * pad,const string & s)2120 void HyperDocument::DrawAHrefBegin(WINDOW *pad, const string &s)
2121 {
2122 if (pad) {
2123 numNestA++;
2124 wattrset(pad, GetNestedAttr());
2125
2126 procA = new Anchor(s);
2127
2128 procA->type = TYPE_GOTO;
2129 procA->rowFrom = curRow;
2130 procA->colFrom = curColumn;
2131
2132 procA->rowTo = -1;
2133 procA->colTo = -1;
2134
2135 anchorList.push_back(sptr<Anchor>(procA));
2136 }
2137 }
2138
DrawAEnd(WINDOW * pad)2139 void HyperDocument::DrawAEnd(WINDOW *pad)
2140 {
2141 if (pad) {
2142 if (numNestA > 0) {
2143 procA->rowTo = curRow;
2144 procA->colTo = curColumn-1; /* Prev column */
2145
2146 numNestA--;
2147 wattrset(pad, GetNestedAttr());
2148
2149 procA = 0;
2150 }
2151 }
2152 }
2153