1 /*******************************************************
2 
3    CoolReader Engine
4 
5    lvtinydom.cpp: fast and compact XML DOM tree
6 
7    (c) Vadim Lopatin, 2000-2011
8    This source code is distributed under the terms of
9    GNU General Public License
10    See LICENSE file for details
11 
12 *******************************************************/
13 
14 
15 /// Change this in case of incompatible changes in XML parsing or DOM
16 // building that could result in XPATHs being different than previously
17 // (this could make saved bookmarks and highlights, made with a previous
18 // version, not found in the DOM built with a newer version.
19 // Users of this library can request the old behaviour by setting
20 // gDOMVersionRequested to an older version to request the old (possibly
21 // buggy) behaviour.
22 #define DOM_VERSION_CURRENT 20200824
23 
24 // Also defined in include/lvtinydom.h
25 #define DOM_VERSION_WITH_NORMALIZED_XPOINTERS 20200223
26 
27 // Changes:
28 // 20100101 to 20180502: historical version
29 //
30 // 20180503: fixed <BR> that were previously converted to <P> because
31 // of a fix for "bad LIB.RU books" being applied in any case. This
32 // could prevent the style of the container to be applied on HTML
33 // sub-parts after a <BR>, or the style of <P> (with text-indent) to
34 // be applied after a <BR>.
35 //
36 // 20180524: changed default rendering of:
37 //   <li> (and css 'display:list-item') from css_d_list_item_legacy to css_d_list_item_block
38 //   <cite> from css_d_block to css_d_inline (inline in HTML, block in FB2, ensured by fb2.css)
39 //   <style> from css_d_inline to css_d_none (invisible in HTML)
40 // Changed also the default display: value for base elements (and so
41 // for unknown elements) from css_d_inherit to css_d_inline, and disable
42 // inheritance for the display: property, as per specs.
43 // See https://developer.mozilla.org/en-US/docs/Web/CSS/display
44 // (Initial value: inline; Inherited: no)
45 //
46 // 20180528: clean epub.css from class name based declarations
47 //   added support for style property -cr-ignore-if-dom-version-greater-or-equal: 20180528;
48 //   to ignore the whole declaration with newer gDOMVersionRequested.
49 //   Use it to keep class name based declarations that involve display:
50 //   so to not break previous DOM
51 // Also: fb2def.h updates
52 //   Changed some elements from XS_TAG1 to XS_TAG1T (<hr>, <ul>, <ol>,
53 //   <dl>, <output>, <section>, <svg>), so any text node direct child is
54 //   now displayed instead of just being dropped (all browsers do display
55 //   such child text nodes).
56 //   Also no more hide the <form> element content, as it may contain
57 //   textual information.
58 //   Also change <code> from 'white-space: pre' to 'normal', like browsers do
59 //   Added missing block elements from HTML specs so they are correctly
60 //   displayed as 'block' instead of the new default of 'inline'.
61 //
62 // (20190703: added support for CSS float: and clear: which may
63 // insert <floatBox> elements in the DOM tree. Bus as this is
64 // toggable, and legacy rendering is available, no need to limit
65 // their support to some DOM_VERSION. So no bump needed.)
66 //
67 // (20200110: added support for CSS display: inline-block and inline-table,
68 // which may insert <inlineBox> elements in the DOM tree. Bus as this is
69 // toggable, and legacy rendering is available, no need to limit
70 // their support to some DOM_VERSION. So no bump needed.)
71 //
72 // 20200223: normalized XPointers/XPATHs, by using createXPointerV2()
73 // and toStringV2(), that should ensure XPointers survive changes
74 // in style->display and the insertion or removal of autoBoxing,
75 // floatBox and inlineBox.
76 // (Older gDOMVersionRequested will keep using createXPointerV1()
77 // and toStringV1() to have non-normalized XPointers still working.)
78 // (20200223: added toggable auto completion of incomplete tables by
79 // wrapping some elements in a new <tabularBox>.)
80 //
81 // 20200824: added more HTML5 elements, and HTML parser changes
82 // to be (only a little bit) more HTML5 conformant
83 
84 extern const int gDOMVersionCurrent = DOM_VERSION_CURRENT;
85 
86 
87 /// change in case of incompatible changes in swap/cache file format to avoid using incompatible swap file
88 #define CACHE_FILE_FORMAT_VERSION "3.12.75"
89 
90 /// increment following value to force re-formatting of old book after load
91 #define FORMATTING_VERSION_ID 0x0026
92 
93 #ifndef DOC_DATA_COMPRESSION_LEVEL
94 /// data compression level (0=no compression, 1=fast compressions, 3=normal compression)
95 // Note: keep this above 1, toggling between compression and no-compression
96 // can be done at run time by calling compressCachedData(false)
97 #define DOC_DATA_COMPRESSION_LEVEL 1 // 0, 1, 3 (0=no compression)
98 #endif
99 
100 #ifndef STREAM_AUTO_SYNC_SIZE
101 #define STREAM_AUTO_SYNC_SIZE 300000
102 #endif //STREAM_AUTO_SYNC_SIZE
103 
104 //=====================================================
105 // Document data caching parameters
106 //=====================================================
107 
108 #ifndef DOC_BUFFER_SIZE
109 #define DOC_BUFFER_SIZE 0x00A00000UL // default buffer size
110 #endif
111 
112 #if DOC_BUFFER_SIZE >= 0x7FFFFFFFUL
113 #error DOC_BUFFER_SIZE value is too large. This results in integer overflow.
114 #endif
115 
116 //--------------------------------------------------------
117 // cache memory sizes
118 //--------------------------------------------------------
119 #ifndef ENABLED_BLOCK_WRITE_CACHE
120 #define ENABLED_BLOCK_WRITE_CACHE 1
121 #endif
122 
123 #define WRITE_CACHE_TOTAL_SIZE    (10*DOC_BUFFER_SIZE/100)
124 
125 #define TEXT_CACHE_UNPACKED_SPACE (25*DOC_BUFFER_SIZE/100)
126 #define TEXT_CACHE_CHUNK_SIZE     0x008000 // 32K
127 #define ELEM_CACHE_UNPACKED_SPACE (45*DOC_BUFFER_SIZE/100)
128 #define ELEM_CACHE_CHUNK_SIZE     0x004000 // 16K
129 #define RECT_CACHE_UNPACKED_SPACE (45*DOC_BUFFER_SIZE/100)
130 #define RECT_CACHE_CHUNK_SIZE     0x00F000 // 64K
131 #define STYLE_CACHE_UNPACKED_SPACE (10*DOC_BUFFER_SIZE/100)
132 #define STYLE_CACHE_CHUNK_SIZE    0x00C000 // 48K
133 //--------------------------------------------------------
134 
135 #define COMPRESS_NODE_DATA          true
136 #define COMPRESS_NODE_STORAGE_DATA  true
137 #define COMPRESS_MISC_DATA          true
138 #define COMPRESS_PAGES_DATA         true
139 #define COMPRESS_TOC_DATA           true
140 #define COMPRESS_PAGEMAP_DATA       true
141 #define COMPRESS_STYLE_DATA         true
142 
143 //#define CACHE_FILE_SECTOR_SIZE 4096
144 #define CACHE_FILE_SECTOR_SIZE 1024
145 #define CACHE_FILE_WRITE_BLOCK_PADDING 1
146 
147 /// set t 1 to log storage reads/writes
148 #define DEBUG_DOM_STORAGE 0
149 //#define TRACE_AUTOBOX
150 /// set to 1 to enable crc check of all blocks of cache file on open
151 #ifndef ENABLE_CACHE_FILE_CONTENTS_VALIDATION
152 #define ENABLE_CACHE_FILE_CONTENTS_VALIDATION 1
153 #endif
154 
155 #define RECT_DATA_CHUNK_ITEMS_SHIFT 11
156 #define STYLE_DATA_CHUNK_ITEMS_SHIFT 12
157 
158 // calculated parameters
159 #define WRITE_CACHE_BLOCK_SIZE 0x4000
160 #define WRITE_CACHE_BLOCK_COUNT (WRITE_CACHE_TOTAL_SIZE/WRITE_CACHE_BLOCK_SIZE)
161 #define TEST_BLOCK_STREAM 0
162 
163 #define PACK_BUF_SIZE 0x10000
164 #define UNPACK_BUF_SIZE 0x40000
165 
166 #define RECT_DATA_CHUNK_ITEMS (1<<RECT_DATA_CHUNK_ITEMS_SHIFT)
167 #define RECT_DATA_CHUNK_SIZE (RECT_DATA_CHUNK_ITEMS*sizeof(lvdomElementFormatRec))
168 #define RECT_DATA_CHUNK_MASK (RECT_DATA_CHUNK_ITEMS-1)
169 
170 #define STYLE_DATA_CHUNK_ITEMS (1<<STYLE_DATA_CHUNK_ITEMS_SHIFT)
171 #define STYLE_DATA_CHUNK_SIZE (STYLE_DATA_CHUNK_ITEMS*sizeof(ldomNodeStyleInfo))
172 #define STYLE_DATA_CHUNK_MASK (STYLE_DATA_CHUNK_ITEMS-1)
173 
174 
175 #define STYLE_HASH_TABLE_SIZE     512
176 #define FONT_HASH_TABLE_SIZE      256
177 
178 
179 static const char COMPRESSED_CACHE_FILE_MAGIC[] = "CoolReader 3 Cache"
180                                        " File v" CACHE_FILE_FORMAT_VERSION ": "
181                                        "c0"
182                                        "m1"
183                                         "\n";
184 
185 static const char UNCOMPRESSED_CACHE_FILE_MAGIC[] = "CoolReader 3 Cache"
186                                        " File v" CACHE_FILE_FORMAT_VERSION ": "
187                                        "c0"
188                                        "m0"
189                                         "\n";
190 
191 #define CACHE_FILE_MAGIC_SIZE 40
192 
193 enum CacheFileBlockType {
194     CBT_FREE = 0,
195     CBT_INDEX = 1,
196     CBT_TEXT_DATA,
197     CBT_ELEM_DATA,
198     CBT_RECT_DATA, //4
199     CBT_ELEM_STYLE_DATA,
200     CBT_MAPS_DATA,
201     CBT_PAGE_DATA, //7
202     CBT_PROP_DATA,
203     CBT_NODE_INDEX,
204     CBT_ELEM_NODE,
205     CBT_TEXT_NODE,
206     CBT_REND_PARAMS, //12
207     CBT_TOC_DATA,
208     CBT_PAGEMAP_DATA,
209     CBT_STYLE_DATA,
210     CBT_BLOB_INDEX, //16
211     CBT_BLOB_DATA,
212     CBT_FONT_DATA  //18
213 };
214 
215 
216 #include <stdlib.h>
217 #include <string.h>
218 #include "../include/crsetup.h"
219 #include "../include/lvstring.h"
220 #include "../include/lvtinydom.h"
221 #include "../include/fb2def.h"
222 #if BUILD_LITE!=1
223 #include "../include/lvrend.h"
224 #include "../include/chmfmt.h"
225 #endif
226 #include "../include/crtest.h"
227 #include "../include/crlog.h"
228 #include <stddef.h>
229 #include <math.h>
230 #include <zlib.h>
231 #include <xxhash.h>
232 #include <lvtextfm.h>
233 
234 // define to store new text nodes as persistent text, instead of mutable
235 #define USE_PERSISTENT_TEXT 1
236 
237 
238 // default is to compress to use smaller cache files (but slower rendering
239 // and page turns with big documents)
240 static bool _compressCachedData = true;
compressCachedData(bool enable)241 void compressCachedData(bool enable) {
242 	_compressCachedData = enable;
243 }
244 
245 // default is to use the TEXT_CACHE_UNPACKED_SPACE & co defined above as is
246 static float _storageMaxUncompressedSizeFactor = 1;
setStorageMaxUncompressedSizeFactor(float factor)247 void setStorageMaxUncompressedSizeFactor(float factor) {
248 	_storageMaxUncompressedSizeFactor = factor;
249 }
250 
251 static bool _enableCacheFileContentsValidation = (bool)ENABLE_CACHE_FILE_CONTENTS_VALIDATION;
enableCacheFileContentsValidation(bool enable)252 void enableCacheFileContentsValidation(bool enable) {
253 	_enableCacheFileContentsValidation = enable;
254 }
255 
256 static int _nextDocumentIndex = 0;
257 ldomDocument * ldomNode::_documentInstances[MAX_DOCUMENT_INSTANCE_COUNT] = {NULL,};
258 
259 /// adds document to list, returns ID of allocated document, -1 if no space in instance array
registerDocument(ldomDocument * doc)260 int ldomNode::registerDocument( ldomDocument * doc )
261 {
262     for ( int i=0; i<MAX_DOCUMENT_INSTANCE_COUNT; i++ ) {
263         if ( _nextDocumentIndex<0 || _nextDocumentIndex>=MAX_DOCUMENT_INSTANCE_COUNT )
264             _nextDocumentIndex = 0;
265         if ( _documentInstances[_nextDocumentIndex]==NULL) {
266             _documentInstances[_nextDocumentIndex] = doc;
267             CRLog::info("ldomNode::registerDocument() - new index = %d", _nextDocumentIndex);
268             return _nextDocumentIndex++;
269         }
270         _nextDocumentIndex++;
271     }
272     return -1;
273 }
274 
275 /// removes document from list
unregisterDocument(ldomDocument * doc)276 void ldomNode::unregisterDocument( ldomDocument * doc )
277 {
278     for ( int i=0; i<MAX_DOCUMENT_INSTANCE_COUNT; i++ ) {
279         if ( _documentInstances[i]==doc ) {
280             CRLog::info("ldomNode::unregisterDocument() - for index %d", i);
281             _documentInstances[i] = NULL;
282         }
283     }
284 }
285 
286 /// mutable text node
287 class ldomTextNode
288 {
289     lUInt32 _parentIndex;
290     lString8 _text;
291 public:
292 
getParentIndex()293     lUInt32 getParentIndex()
294     {
295         return _parentIndex;
296     }
297 
setParentIndex(lUInt32 n)298     void setParentIndex( lUInt32 n )
299     {
300         _parentIndex = n;
301     }
302 
ldomTextNode(lUInt32 parentIndex,const lString8 & text)303     ldomTextNode( lUInt32 parentIndex, const lString8 & text )
304     : _parentIndex(parentIndex), _text(text)
305     {
306     }
307 
getText()308     lString8 getText()
309     {
310         return _text;
311     }
312 
getText32()313     lString32 getText32()
314     {
315         return Utf8ToUnicode(_text);
316     }
317 
setText(const lString8 & s)318     void setText( const lString8 & s )
319     {
320         _text = s;
321     }
322 
setText(const lString32 & s)323     void setText( const lString32 & s )
324     {
325         _text = UnicodeToUtf8(s);
326     }
327 };
328 
329 #define LASSERT(x) \
330     if (!(x)) crFatalError(1111, "assertion failed: " #x)
331 
332 //#define INDEX1 94
333 //#define INDEX2 96
334 
335 //#define INDEX1 105
336 //#define INDEX2 106
337 
338 /// pack data from _buf to _compbuf
339 bool ldomPack( const lUInt8 * buf, int bufsize, lUInt8 * &dstbuf, lUInt32 & dstsize );
340 /// unpack data from _compbuf to _buf
341 bool ldomUnpack( const lUInt8 * compbuf, int compsize, lUInt8 * &dstbuf, lUInt32 & dstsize  );
342 
343 
344 #if BUILD_LITE!=1
345 
346 //static lUInt32 calcHash32( const lUInt8 * s, int len )
347 //{
348 //    lUInt32 res = 0;
349 //    for ( int i=0; i<len; i++ ) {
350 //        // res*31 + s
351 //        res = (((((((res<<1)+res)<<1)+res)<<1)+res)<<1)+res + s[i];
352 //    }
353 //    return res;
354 //}
355 
356 // FNV 64bit hash function
357 // from http://isthe.com/chongo/tech/comp/fnv/#gcc-O3
358 
359 //#define NO_FNV_GCC_OPTIMIZATION
360 /*#define FNV_64_PRIME ((lUInt64)0x100000001b3ULL)
361 static lUInt64 calcHash64( const lUInt8 * s, int len )
362 {
363     const lUInt8 * endp = s + len;
364     // 64 bit FNV hash function
365     lUInt64 hval = 14695981039346656037ULL;
366     for ( ; s<endp; s++ ) {
367 #if defined(NO_FNV_GCC_OPTIMIZATION)
368         hval *= FNV_64_PRIME;
369 #else *//* NO_FNV_GCC_OPTIMIZATION *//*
370         hval += (hval << 1) + (hval << 4) + (hval << 5) +
371             (hval << 7) + (hval << 8) + (hval << 40);
372 #endif *//* NO_FNV_GCC_OPTIMIZATION *//*
373         hval ^= *s;
374     }
375     return hval;
376 }*/
calcHash(const lUInt8 * s,int len)377 static lUInt32 calcHash(const lUInt8 * s, int len)
378 {
379 return XXH32(s,len,0);
380 }
calcGlobalSettingsHash(int documentId,bool already_rendered)381 lUInt32 calcGlobalSettingsHash(int documentId, bool already_rendered)
382 {
383     lUInt32 hash = FORMATTING_VERSION_ID;
384     hash = hash * 31 + (int)fontMan->GetShapingMode();
385     if (fontMan->GetKerning())
386         hash = hash * 75 + 1761;
387     hash = hash * 31 + fontMan->GetFontListHash(documentId);
388     hash = hash * 31 + (int)fontMan->GetHintingMode();
389     if ( LVRendGetFontEmbolden() )
390         hash = hash * 75 + 2384761;
391     hash = hash * 31 + fontMan->GetFallbackFontFaces().getHash();
392     hash = hash * 31 + gRenderDPI;
393     hash = hash * 31 + gRootFontSize;
394     // If not yet rendered (initial loading with XML parsing), we can
395     // ignore some global flags that have not yet produced any effect,
396     // so they can possibly be updated between loading and rendering
397     // without trigerring a drop of all the styles and rend methods
398     // set up in the XML loading phase. This is mostly only needed
399     // for TextLangMan::getHash(), as the lang can be set by frontend
400     // code after the loading phase, once the book language is known
401     // from its metadata, before the rendering that will use the
402     // language set. (We could ignore some of the other settings
403     // above if we ever need to reset them in between these phases;
404     // just be certain they are really not used in the first phase.)
405     if ( already_rendered ) {
406         hash = hash * 31 + TextLangMan::getHash();
407         hash = hash * 31 + HyphMan::getLeftHyphenMin();
408         hash = hash * 31 + HyphMan::getRightHyphenMin();
409         hash = hash * 31 + HyphMan::getTrustSoftHyphens();
410     }
411     return hash;
412 }
413 
dumpRendMethods(ldomNode * node,lString32 prefix)414 static void dumpRendMethods( ldomNode * node, lString32 prefix )
415 {
416     lString32 name = prefix;
417     if ( node->isText() )
418         name << node->getText();
419     else
420         name << "<" << node->getNodeName() << ">   " << fmt::decimal(node->getRendMethod());
421     CRLog::trace( "%s ",LCSTR(name) );
422     for ( int i=0; i<node->getChildCount(); i++ ) {
423         dumpRendMethods( node->getChildNode(i), prefix + "   ");
424     }
425 }
426 
427 
428 
429 
430 
431 #define CACHE_FILE_ITEM_MAGIC 0xC007B00C
432 struct CacheFileItem
433 {
434     lUInt32 _magic;    // magic number
435     lUInt16 _dataType;     // data type
436     lUInt16 _dataIndex;    // additional data index, for internal usage for data type
437     int _blockIndex;   // sequential number of block
438     int _blockFilePos; // start of block
439     int _blockSize;    // size of block within file
440     int _dataSize;     // used data size inside block (<= block size)
441     lUInt64 _dataHash; // additional hash of data
442     lUInt64 _packedHash; // additional hash of packed data
443     lUInt32 _uncompressedSize;   // size of uncompressed block, if compression is applied, 0 if no compression
444     lUInt32 _padding;  // explicite padding (this struct would be implicitely padded from 44 bytes to 48 bytes)
445                        // so we can set this padding value to 0 (instead of some random data with implicite padding)
446                        // in order to get reproducible (same file checksum) cache files when this gets serialized
validateCacheFileItem447     bool validate( int fsize )
448     {
449         if ( _magic!=CACHE_FILE_ITEM_MAGIC ) {
450             CRLog::error("CacheFileItem::validate: block magic doesn't match");
451             return false;
452         }
453         if ( _dataSize>_blockSize || _blockSize<0 || _dataSize<0 || _blockFilePos+_dataSize>fsize || _blockFilePos<CACHE_FILE_SECTOR_SIZE) {
454             CRLog::error("CacheFileItem::validate: invalid block size or position");
455             return false;
456         }
457         return true;
458     }
CacheFileItemCacheFileItem459     CacheFileItem()
460     {
461     }
CacheFileItemCacheFileItem462     CacheFileItem( lUInt16 dataType, lUInt16 dataIndex )
463     : _magic(CACHE_FILE_ITEM_MAGIC)
464     , _dataType(dataType)   // data type
465     , _dataIndex(dataIndex) // additional data index, for internal usage for data type
466     , _blockIndex(0)        // sequential number of block
467     , _blockFilePos(0)      // start of block
468     , _blockSize(0)         // size of block within file
469     , _dataSize(0)          // used data size inside block (<= block size)
470     , _dataHash(0)          // hash of data
471     , _packedHash(0) // additional hash of packed data
472     , _uncompressedSize(0)  // size of uncompressed block, if compression is applied, 0 if no compression
473     , _padding(0)           // (padding)
474     {
475     }
476 };
477 
478 
479 struct SimpleCacheFileHeader
480 {
481     char _magic[CACHE_FILE_MAGIC_SIZE] = { 0 }; // magic
482     lUInt32 _dirty;
483     lUInt32 _dom_version;
SimpleCacheFileHeaderSimpleCacheFileHeader484     SimpleCacheFileHeader( lUInt32 dirtyFlag, lUInt32 domVersion ) {
485         memcpy( _magic, _compressCachedData ? COMPRESSED_CACHE_FILE_MAGIC : UNCOMPRESSED_CACHE_FILE_MAGIC, CACHE_FILE_MAGIC_SIZE );
486         _dirty = dirtyFlag;
487         _dom_version = domVersion;
488     }
489 };
490 
491 struct CacheFileHeader : public SimpleCacheFileHeader
492 {
493     lUInt32 _fsize;
494     // Padding to explicitly align the index block structure, and that can be
495     // be initialized to zero for reproducible file contents.
496     lUInt32 _padding;
497     CacheFileItem _indexBlock; // index array block parameters,
498     // duplicate of one of index records which contains
validateCacheFileHeader499     bool validate(lUInt32 domVersionRequested)
500     {
501         if (memcmp(_magic, _compressCachedData ? COMPRESSED_CACHE_FILE_MAGIC : UNCOMPRESSED_CACHE_FILE_MAGIC, CACHE_FILE_MAGIC_SIZE) != 0) {
502             CRLog::error("CacheFileHeader::validate: magic doesn't match");
503             return false;
504         }
505         if ( _dirty!=0 ) {
506             CRLog::error("CacheFileHeader::validate: dirty flag is set");
507             printf("CRE: ignoring cache file (marked as dirty)\n");
508             return false;
509         }
510         if ( _dom_version != domVersionRequested ) {
511             CRLog::error("CacheFileHeader::validate: DOM version mismatch");
512             printf("CRE: ignoring cache file (dom version mismatch)\n");
513             return false;
514         }
515         return true;
516     }
CacheFileHeaderCacheFileHeader517     CacheFileHeader( CacheFileItem * indexRec, int fsize, lUInt32 dirtyFlag, lUInt32 domVersion )
518     : SimpleCacheFileHeader(dirtyFlag, domVersion), _indexBlock(0,0)
519     , _padding(0)
520     {
521         if ( indexRec ) {
522             memcpy( &_indexBlock, indexRec, sizeof(CacheFileItem));
523         } else
524             memset( &_indexBlock, 0, sizeof(CacheFileItem));
525         _fsize = fsize;
526     }
527 };
528 
529 /**
530  * Cache file implementation.
531  */
532 class CacheFile
533 {
534     int _sectorSize; // block position and size granularity
535     int _size;
536     bool _indexChanged;
537     bool _dirty;
538     lUInt32 _domVersion;
539     lString32 _cachePath;
540     LVStreamRef _stream; // file stream
541     LVPtrVector<CacheFileItem, true> _index; // full file block index
542     LVPtrVector<CacheFileItem, false> _freeIndex; // free file block index
543     LVHashTable<lUInt32, CacheFileItem*> _map; // hash map for fast search
544     // searches for existing block
545     CacheFileItem * findBlock( lUInt16 type, lUInt16 index );
546     // alocates block at index, reuses existing one, if possible
547     CacheFileItem * allocBlock( lUInt16 type, lUInt16 index, int size );
548     // mark block as free, for later reusing
549     void freeBlock( CacheFileItem * block );
550     // writes file header
551     bool updateHeader();
552     // writes index block
553     bool writeIndex();
554     // reads index from file
555     bool readIndex();
556     // reads all blocks of index and checks CRCs
557     bool validateContents();
558 public:
559     // return current file size
getSize()560     int getSize() { return _size; }
561     // create uninitialized cache file, call open or create to initialize
562     CacheFile(lUInt32 domVersion);
563     // free resources
564     ~CacheFile();
565     // try open existing cache file
566     bool open( lString32 filename );
567     // try open existing cache file from stream
568     bool open( LVStreamRef stream );
569     // create new cache file
570     bool create( lString32 filename );
571     // create new cache file in stream
572     bool create( LVStreamRef stream );
573     /// writes block to file
574     bool write( lUInt16 type, lUInt16 dataIndex, const lUInt8 * buf, int size, bool compress );
575     /// reads and allocates block in memory
576     bool read( lUInt16 type, lUInt16 dataIndex, lUInt8 * &buf, int &size );
577     /// reads and validates block
578     bool validate( CacheFileItem * block );
579     /// writes content of serial buffer
580     bool write( lUInt16 type, lUInt16 index, SerialBuf & buf, bool compress );
581     /// reads content of serial buffer
582     bool read( lUInt16 type, lUInt16 index, SerialBuf & buf );
583     /// writes content of serial buffer
write(lUInt16 type,SerialBuf & buf,bool compress)584     bool write( lUInt16 type, SerialBuf & buf, bool compress )
585     {
586         return write( type, 0, buf, compress);
587     }
588     /// reads content of serial buffer
read(lUInt16 type,SerialBuf & buf)589     bool read( lUInt16 type, SerialBuf & buf )
590     {
591         return read(type, 0, buf);
592     }
593     /// reads block as a stream
594     LVStreamRef readStream(lUInt16 type, lUInt16 index);
595 
596     /// sets dirty flag value, returns true if value is changed
597     bool setDirtyFlag( bool dirty );
598     /// sets DOM version value, returns true if value is changed
599     bool setDOMVersion( lUInt32 domVersion );
600     // flushes index
601     bool flush( bool clearDirtyFlag, CRTimerUtil & maxTime );
roundSector(int n)602     int roundSector( int n )
603     {
604         return (n + (_sectorSize-1)) & ~(_sectorSize-1);
605     }
setAutoSyncSize(int sz)606     void setAutoSyncSize(int sz) {
607         _stream->setAutoSyncSize(sz);
608     }
setCachePath(const lString32 cachePath)609     void setCachePath(const lString32 cachePath) {
610         _cachePath = cachePath;
611     }
getCachePath()612     const lString32 getCachePath() {
613         return _cachePath;
614     }
615 };
616 
617 
618 // create uninitialized cache file, call open or create to initialize
CacheFile(lUInt32 domVersion)619 CacheFile::CacheFile(lUInt32 domVersion)
620 : _sectorSize( CACHE_FILE_SECTOR_SIZE ), _size(0), _indexChanged(false), _dirty(true), _domVersion(domVersion), _map(1024), _cachePath(lString32::empty_str)
621 {
622 }
623 
624 // free resources
~CacheFile()625 CacheFile::~CacheFile()
626 {
627     if ( !_stream.isNull() ) {
628         // don't flush -- leave file dirty
629         //CRTimerUtil infinite;
630         //flush( true, infinite );
631     }
632 }
633 
634 /// sets dirty flag value, returns true if value is changed
setDirtyFlag(bool dirty)635 bool CacheFile::setDirtyFlag( bool dirty )
636 {
637     if ( _dirty==dirty )
638         return false;
639     if ( !dirty ) {
640         CRLog::info("CacheFile::clearing Dirty flag");
641         _stream->Flush(true);
642     } else {
643         CRLog::info("CacheFile::setting Dirty flag");
644     }
645     _dirty = dirty;
646     SimpleCacheFileHeader hdr(_dirty?1:0, _domVersion);
647     _stream->SetPos(0);
648     lvsize_t bytesWritten = 0;
649     _stream->Write(&hdr, sizeof(hdr), &bytesWritten );
650     if ( bytesWritten!=sizeof(hdr) )
651         return false;
652     _stream->Flush(true);
653     //CRLog::trace("setDirtyFlag : hdr is saved with Dirty flag = %d", hdr._dirty);
654     return true;
655 }
656 
setDOMVersion(lUInt32 domVersion)657 bool CacheFile::setDOMVersion( lUInt32 domVersion ) {
658     if ( _domVersion == domVersion )
659         return false;
660     CRLog::info("CacheFile::setting DOM version value");
661     _domVersion = domVersion;
662     SimpleCacheFileHeader hdr(_dirty?1:0, _domVersion);
663     _stream->SetPos(0);
664     lvsize_t bytesWritten = 0;
665     _stream->Write(&hdr, sizeof(hdr), &bytesWritten );
666     if ( bytesWritten!=sizeof(hdr) )
667         return false;
668     _stream->Flush(true);
669     //CRLog::trace("setDOMVersion : hdr is saved with DOM version = %u", hdr._domVersionRequested);
670     return true;
671 }
672 
673 // flushes index
flush(bool clearDirtyFlag,CRTimerUtil & maxTime)674 bool CacheFile::flush( bool clearDirtyFlag, CRTimerUtil & maxTime )
675 {
676     if ( clearDirtyFlag ) {
677         //setDirtyFlag(true);
678         if ( !writeIndex() )
679             return false;
680         setDirtyFlag(false);
681     } else {
682         _stream->Flush(false, maxTime);
683         //CRLog::trace("CacheFile->flush() took %d ms ", (int)timer.elapsed());
684     }
685     return true;
686 }
687 
688 // reads all blocks of index and checks CRCs
validateContents()689 bool CacheFile::validateContents()
690 {
691     CRLog::info("Started validation of cache file contents");
692     LVHashTable<lUInt32, CacheFileItem*>::pair * pair;
693     for ( LVHashTable<lUInt32, CacheFileItem*>::iterator p = _map.forwardIterator(); (pair=p.next())!=NULL; ) {
694         if ( pair->value->_dataType==CBT_INDEX )
695             continue;
696         if ( !validate(pair->value) ) {
697             CRLog::error("Contents validation is failed for block type=%d index=%d", (int)pair->value->_dataType, pair->value->_dataIndex );
698             return false;
699         }
700     }
701     CRLog::info("Finished validation of cache file contents -- successful");
702     return true;
703 }
704 
705 // reads index from file
readIndex()706 bool CacheFile::readIndex()
707 {
708     CacheFileHeader hdr(NULL, _size, 0, 0);
709     _stream->SetPos(0);
710     lvsize_t bytesRead = 0;
711     _stream->Read(&hdr, sizeof(hdr), &bytesRead );
712     if ( bytesRead!=sizeof(hdr) )
713         return false;
714     CRLog::info("Header read: DirtyFlag=%d", hdr._dirty);
715     CRLog::info("Header read: DOM level=%u", hdr._dom_version);
716     if ( !hdr.validate(_domVersion) )
717         return false;
718     if ( (int)hdr._fsize > _size + 4096-1 ) {
719         CRLog::error("CacheFile::readIndex: file size doesn't match with header");
720         return false;
721     }
722     if ( !hdr._indexBlock._blockFilePos )
723         return true; // empty index is ok
724     if ( hdr._indexBlock._blockFilePos>=(int)hdr._fsize || hdr._indexBlock._blockFilePos+hdr._indexBlock._blockSize>(int)hdr._fsize+4096-1 ) {
725         CRLog::error("CacheFile::readIndex: Wrong index file position specified in header");
726         return false;
727     }
728     if ((int)_stream->SetPos(hdr._indexBlock._blockFilePos)!=hdr._indexBlock._blockFilePos ) {
729         CRLog::error("CacheFile::readIndex: cannot move file position to index block");
730         return false;
731     }
732     int count = hdr._indexBlock._dataSize / sizeof(CacheFileItem);
733     if ( count<0 || count>100000 ) {
734         CRLog::error("CacheFile::readIndex: invalid number of blocks in index");
735         return false;
736     }
737     CacheFileItem * index = new CacheFileItem[count];
738     bytesRead = 0;
739     lvsize_t  sz = sizeof(CacheFileItem)*count;
740     _stream->Read(index, sz, &bytesRead );
741     if ( bytesRead!=sz )
742         return false;
743     // check CRC
744     lUInt32 hash = calcHash( (lUInt8*)index, sz );
745     if ( hdr._indexBlock._dataHash!=hash ) {
746         CRLog::error("CacheFile::readIndex: CRC doesn't match found %08x expected %08x", hash, hdr._indexBlock._dataHash);
747         delete[] index;
748         return false;
749     }
750     for ( int i=0; i<count; i++ ) {
751         if (index[i]._dataType == CBT_INDEX)
752             index[i] = hdr._indexBlock;
753         if ( !index[i].validate(_size) ) {
754             delete[] index;
755             return false;
756         }
757         CacheFileItem * item = new CacheFileItem();
758         memcpy(item, &index[i], sizeof(CacheFileItem));
759         _index.add( item );
760         lUInt32 key = ((lUInt32)item->_dataType)<<16 | item->_dataIndex;
761         if ( key==0 )
762             _freeIndex.add( item );
763         else
764             _map.set( key, item );
765     }
766     delete[] index;
767     CacheFileItem * indexitem = findBlock(CBT_INDEX, 0);
768     if ( !indexitem ) {
769         CRLog::error("CacheFile::readIndex: index block info doesn't match header");
770         return false;
771     }
772     _dirty = hdr._dirty ? true : false;
773     return true;
774 }
775 
776 // writes index block
writeIndex()777 bool CacheFile::writeIndex()
778 {
779     if ( !_indexChanged )
780         return true; // no changes: no writes
781 
782     if ( _index.length()==0 )
783         return updateHeader();
784 
785     // create copy of index in memory
786     int count = _index.length();
787     CacheFileItem * indexItem = findBlock(CBT_INDEX, 0);
788     if (!indexItem) {
789         int sz = sizeof(CacheFileItem) * (count * 2 + 100);
790         allocBlock(CBT_INDEX, 0, sz);
791         indexItem = findBlock(CBT_INDEX, 0);
792         (void)indexItem; // silences clang warning
793         count = _index.length();
794     }
795     CacheFileItem * index = new CacheFileItem[count]();
796     int sz = count * sizeof(CacheFileItem);
797     for ( int i = 0; i < count; i++ ) {
798         memcpy( &index[i], _index[i], sizeof(CacheFileItem) );
799         if (index[i]._dataType == CBT_INDEX) {
800             index[i]._dataHash = 0;
801             index[i]._packedHash = 0;
802             index[i]._dataSize = 0;
803         }
804     }
805     bool res = write(CBT_INDEX, 0, (const lUInt8*)index, sz, false);
806     delete[] index;
807 
808     indexItem = findBlock(CBT_INDEX, 0);
809     if ( !res || !indexItem ) {
810         CRLog::error("CacheFile::writeIndex: error while writing index!!!");
811         return false;
812     }
813 
814     updateHeader();
815     _indexChanged = false;
816     return true;
817 }
818 
819 // writes file header
updateHeader()820 bool CacheFile::updateHeader()
821 {
822     CacheFileItem * indexItem = NULL;
823     indexItem = findBlock(CBT_INDEX, 0);
824     CacheFileHeader hdr(indexItem, _size, _dirty?1:0, _domVersion);
825     _stream->SetPos(0);
826     lvsize_t bytesWritten = 0;
827     _stream->Write(&hdr, sizeof(hdr), &bytesWritten );
828     if ( bytesWritten!=sizeof(hdr) )
829         return false;
830     //CRLog::trace("updateHeader finished: Dirty flag = %d", hdr._dirty);
831     return true;
832 }
833 
834 //
freeBlock(CacheFileItem * block)835 void CacheFile::freeBlock( CacheFileItem * block )
836 {
837     lUInt32 key = ((lUInt32)block->_dataType)<<16 | block->_dataIndex;
838     _map.remove(key);
839     block->_dataIndex = 0;
840     block->_dataType = 0;
841     block->_dataSize = 0;
842     _freeIndex.add( block );
843 }
844 
845 /// reads block as a stream
readStream(lUInt16 type,lUInt16 index)846 LVStreamRef CacheFile::readStream(lUInt16 type, lUInt16 index)
847 {
848     CacheFileItem * block = findBlock(type, index);
849     if (block && block->_dataSize) {
850 #if 0
851         lUInt8 * buf = NULL;
852         int size = 0;
853         if (read(type, index, buf, size))
854             return LVCreateMemoryStream(buf, size);
855 #else
856         return LVStreamRef(new LVStreamFragment(_stream, block->_blockFilePos, block->_dataSize));
857 #endif
858     }
859     return LVStreamRef();
860 }
861 
862 // searches for existing block
findBlock(lUInt16 type,lUInt16 index)863 CacheFileItem * CacheFile::findBlock( lUInt16 type, lUInt16 index )
864 {
865     lUInt32 key = ((lUInt32)type)<<16 | index;
866     CacheFileItem * existing = _map.get( key );
867     return existing;
868 }
869 
870 // allocates index record for block, sets its new size
allocBlock(lUInt16 type,lUInt16 index,int size)871 CacheFileItem * CacheFile::allocBlock( lUInt16 type, lUInt16 index, int size )
872 {
873     lUInt32 key = ((lUInt32)type)<<16 | index;
874     CacheFileItem * existing = _map.get( key );
875     if ( existing ) {
876         if ( existing->_blockSize >= size ) {
877             if ( existing->_dataSize != size ) {
878                 existing->_dataSize = size;
879                 _indexChanged = true;
880             }
881             return existing;
882         }
883         // old block has not enough space: free it
884         freeBlock( existing );
885         existing = NULL;
886     }
887     // search for existing free block of proper size
888     int bestSize = -1;
889     //int bestIndex = -1;
890     for ( int i=0; i<_freeIndex.length(); i++ ) {
891         if ( _freeIndex[i] && (_freeIndex[i]->_blockSize>=size) && (bestSize==-1 || _freeIndex[i]->_blockSize<bestSize) ) {
892             bestSize = _freeIndex[i]->_blockSize;
893             //bestIndex = -1;
894             existing = _freeIndex[i];
895         }
896     }
897     if ( existing ) {
898         _freeIndex.remove( existing );
899         existing->_dataType = type;
900         existing->_dataIndex = index;
901         existing->_dataSize = size;
902         _map.set( key, existing );
903         _indexChanged = true;
904         return existing;
905     }
906     // allocate new block
907     CacheFileItem * block = new CacheFileItem( type, index );
908     _map.set( key, block );
909     block->_blockSize = roundSector(size);
910     block->_dataSize = size;
911     block->_blockIndex = _index.length();
912     _index.add(block);
913     block->_blockFilePos = _size;
914     _size += block->_blockSize;
915     _indexChanged = true;
916     // really, file size is not extended
917     return block;
918 }
919 
920 /// reads and validates block
validate(CacheFileItem * block)921 bool CacheFile::validate( CacheFileItem * block )
922 {
923     lUInt8 * buf = NULL;
924     unsigned size = 0;
925 
926     if ( (int)_stream->SetPos( block->_blockFilePos )!=block->_blockFilePos ) {
927         CRLog::error("CacheFile::validate: Cannot set position for block %d:%d of size %d", block->_dataType, block->_dataIndex, (int)size);
928         return false;
929     }
930 
931     // read block from file
932     size = block->_dataSize;
933     buf = (lUInt8 *)malloc(size);
934     lvsize_t bytesRead = 0;
935     _stream->Read(buf, size, &bytesRead );
936     if ( bytesRead!=size ) {
937         CRLog::error("CacheFile::validate: Cannot read block %d:%d of size %d", block->_dataType, block->_dataIndex, (int)size);
938         free(buf);
939         return false;
940     }
941 
942     // check CRC for file block
943     lUInt32 packedhash = calcHash( buf, size );
944     if ( packedhash!=block->_packedHash ) {
945         CRLog::error("CacheFile::validate: packed data CRC doesn't match for block %d:%d of size %d", block->_dataType, block->_dataIndex, (int)size);
946         free(buf);
947         return false;
948     }
949     free(buf);
950     return true;
951 }
952 
953 // reads and allocates block in memory
read(lUInt16 type,lUInt16 dataIndex,lUInt8 * & buf,int & size)954 bool CacheFile::read( lUInt16 type, lUInt16 dataIndex, lUInt8 * &buf, int &size )
955 {
956     buf = NULL;
957     size = 0;
958     CacheFileItem * block = findBlock( type, dataIndex );
959     if ( !block ) {
960         CRLog::error("CacheFile::read: Block %d:%d not found in file", type, dataIndex);
961         return false;
962     }
963     if ( (int)_stream->SetPos( block->_blockFilePos )!=block->_blockFilePos )
964         return false;
965 
966     // read block from file
967     size = block->_dataSize;
968     buf = (lUInt8 *)malloc(size);
969     lvsize_t bytesRead = 0;
970     _stream->Read(buf, size, &bytesRead );
971     if ( (int)bytesRead!=size ) {
972         CRLog::error("CacheFile::read: Cannot read block %d:%d of size %d, bytesRead=%d", type, dataIndex, (int)size, (int)bytesRead);
973         free(buf);
974         buf = NULL;
975         size = 0;
976         return false;
977     }
978 
979     bool compress = block->_uncompressedSize!=0;
980 
981     if ( compress ) {
982         // block is compressed
983 
984         // check crc separately only for compressed data
985         lUInt32 packedhash = calcHash( buf, size );
986         if ( packedhash!=block->_packedHash ) {
987             CRLog::error("CacheFile::read: packed data CRC doesn't match for block %d:%d of size %d", type, dataIndex, (int)size);
988             free(buf);
989             buf = NULL;
990             size = 0;
991             return false;
992         }
993 
994         // uncompress block data
995         lUInt8 * uncomp_buf = NULL;
996         lUInt32 uncomp_size = 0;
997         if ( ldomUnpack(buf, size, uncomp_buf, uncomp_size) && uncomp_size==block->_uncompressedSize ) {
998             free( buf );
999             buf = uncomp_buf;
1000             size = uncomp_size;
1001         } else {
1002             CRLog::error("CacheFile::read: error while uncompressing data for block %d:%d of size %d", type, dataIndex, (int)size);
1003             free(buf);
1004             buf = NULL;
1005             size = 0;
1006             return false;
1007         }
1008     }
1009 
1010     // check CRC
1011     lUInt32 hash = calcHash( buf, size );
1012     if (hash != block->_dataHash) {
1013         CRLog::error("CacheFile::read: CRC doesn't match for block %d:%d of size %d", type, dataIndex, (int)size);
1014         free(buf);
1015         buf = NULL;
1016         size = 0;
1017         return false;
1018     }
1019     // Success. Don't forget to free allocated block externally
1020     return true;
1021 }
1022 
1023 // writes block to file
write(lUInt16 type,lUInt16 dataIndex,const lUInt8 * buf,int size,bool compress)1024 bool CacheFile::write( lUInt16 type, lUInt16 dataIndex, const lUInt8 * buf, int size, bool compress )
1025 {
1026     // check whether data is changed
1027     lUInt32 newhash = calcHash( buf, size );
1028     CacheFileItem * existingblock = findBlock( type, dataIndex );
1029 
1030     if (existingblock) {
1031         bool sameSize = ((int)existingblock->_uncompressedSize==size) || (existingblock->_uncompressedSize==0 && (int)existingblock->_dataSize==size);
1032         if (sameSize && existingblock->_dataHash == newhash ) {
1033             return true;
1034         }
1035     }
1036 
1037 #if 0
1038     if (existingblock)
1039         CRLog::trace("*    oldsz=%d oldhash=%08x", (int)existingblock->_uncompressedSize, (int)existingblock->_dataHash);
1040     CRLog::trace("* wr block t=%d[%d] sz=%d hash=%08x", type, dataIndex, size, newhash);
1041 #endif
1042     setDirtyFlag(true);
1043 
1044     lUInt32 uncompressedSize = 0;
1045     lUInt64 newpackedhash = newhash;
1046     if (!_compressCachedData)
1047         compress = false;
1048     if ( compress ) {
1049         lUInt8 * dstbuf = NULL;
1050         lUInt32 dstsize = 0;
1051         if ( !ldomPack( buf, size, dstbuf, dstsize ) ) {
1052             compress = false;
1053         } else {
1054             uncompressedSize = size;
1055             size = dstsize;
1056             buf = dstbuf;
1057             newpackedhash = calcHash( buf, size );
1058 #if DEBUG_DOM_STORAGE==1
1059             //CRLog::trace("packed block %d:%d : %d to %d bytes (%d%%)", type, dataIndex, srcsize, dstsize, srcsize>0?(100*dstsize/srcsize):0 );
1060 #endif
1061         }
1062     }
1063 
1064     CacheFileItem * block = NULL;
1065     if ( existingblock && existingblock->_dataSize>=size ) {
1066         // reuse existing block
1067         block = existingblock;
1068     } else {
1069         // allocate new block
1070         if ( existingblock )
1071             freeBlock( existingblock );
1072         block = allocBlock( type, dataIndex, size );
1073     }
1074     if ( !block )
1075     {
1076 #if DOC_DATA_COMPRESSION_LEVEL!=0
1077         if ( compress ) {
1078             free( (void*)buf );
1079         }
1080 #endif
1081         return false;
1082     }
1083     if ( (int)_stream->SetPos( block->_blockFilePos )!=block->_blockFilePos )
1084     {
1085 #if DOC_DATA_COMPRESSION_LEVEL!=0
1086         if ( compress ) {
1087             free( (void*)buf );
1088         }
1089 #endif
1090         return false;
1091     }
1092     // assert: size == block->_dataSize
1093     // actual writing of data
1094     block->_dataSize = size;
1095     lvsize_t bytesWritten = 0;
1096     _stream->Write(buf, size, &bytesWritten );
1097     if ( (int)bytesWritten!=size )
1098     {
1099 #if DOC_DATA_COMPRESSION_LEVEL!=0
1100         if ( compress ) {
1101             free( (void*)buf );
1102         }
1103 #endif
1104         return false;
1105     }
1106 #if CACHE_FILE_WRITE_BLOCK_PADDING==1
1107     int paddingSize = block->_blockSize - size; //roundSector( size ) - size
1108     if ( paddingSize ) {
1109         if ((int)block->_blockFilePos + (int)block->_dataSize >= (int)_stream->GetSize() - _sectorSize) {
1110             LASSERT(size + paddingSize == block->_blockSize );
1111 //            if (paddingSize > 16384) {
1112 //                CRLog::error("paddingSize > 16384");
1113 //            }
1114 //            LASSERT(paddingSize <= 16384);
1115             lUInt8 tmp[16384];//paddingSize];
1116             memset(tmp, 0xFF, paddingSize < 16384 ? paddingSize : 16384);
1117             do {
1118                 int blkSize = paddingSize < 16384 ? paddingSize : 16384;
1119                 _stream->Write(tmp, blkSize, &bytesWritten );
1120                 paddingSize -= blkSize;
1121             } while (paddingSize > 0);
1122         }
1123     }
1124 #endif
1125     //_stream->Flush(true);
1126     // update CRC
1127     block->_dataHash = newhash;
1128     block->_packedHash = newpackedhash;
1129     block->_uncompressedSize = uncompressedSize;
1130 
1131     if ( compress ) {
1132         free( (void*)buf );
1133     }
1134     _indexChanged = true;
1135 
1136     //CRLog::error("CacheFile::write: block %d:%d (pos %ds, size %ds) is written (crc=%08x)", type, dataIndex, (int)block->_blockFilePos/_sectorSize, (int)(size+_sectorSize-1)/_sectorSize, block->_dataCRC);
1137     // success
1138     return true;
1139 }
1140 
1141 /// writes content of serial buffer
write(lUInt16 type,lUInt16 index,SerialBuf & buf,bool compress)1142 bool CacheFile::write( lUInt16 type, lUInt16 index, SerialBuf & buf, bool compress )
1143 {
1144     return write( type, index, buf.buf(), buf.pos(), compress );
1145 }
1146 
1147 /// reads content of serial buffer
read(lUInt16 type,lUInt16 index,SerialBuf & buf)1148 bool CacheFile::read( lUInt16 type, lUInt16 index, SerialBuf & buf )
1149 {
1150     lUInt8 * tmp = NULL;
1151     int size = 0;
1152     bool res = read( type, index, tmp, size );
1153     if ( res ) {
1154         buf.set( tmp, size );
1155     }
1156     buf.setPos(0);
1157     return res;
1158 }
1159 
1160 // try open existing cache file
open(lString32 filename)1161 bool CacheFile::open( lString32 filename )
1162 {
1163     LVStreamRef stream = LVOpenFileStream( filename.c_str(), LVOM_APPEND );
1164     if ( !stream ) {
1165         CRLog::error( "CacheFile::open: cannot open file %s", LCSTR(filename));
1166         return false;
1167     }
1168     crSetFileToRemoveOnFatalError(LCSTR(filename));
1169     return open(stream);
1170 }
1171 
1172 
1173 // try open existing cache file
open(LVStreamRef stream)1174 bool CacheFile::open( LVStreamRef stream )
1175 {
1176     _stream = stream;
1177     _size = _stream->GetSize();
1178     //_stream->setAutoSyncSize(STREAM_AUTO_SYNC_SIZE);
1179 
1180     if ( !readIndex() ) {
1181         CRLog::error("CacheFile::open : cannot read index from file");
1182         return false;
1183     }
1184     if (_enableCacheFileContentsValidation && !validateContents() ) {
1185         CRLog::error("CacheFile::open : file contents validation failed");
1186         return false;
1187     }
1188     return true;
1189 }
1190 
create(lString32 filename)1191 bool CacheFile::create( lString32 filename )
1192 {
1193     LVStreamRef stream = LVOpenFileStream( filename.c_str(), LVOM_APPEND );
1194     if ( _stream.isNull() ) {
1195         CRLog::error( "CacheFile::create: cannot create file %s", LCSTR(filename));
1196         return false;
1197     }
1198     crSetFileToRemoveOnFatalError(LCSTR(filename));
1199     return create(stream);
1200 }
1201 
create(LVStreamRef stream)1202 bool CacheFile::create( LVStreamRef stream )
1203 {
1204     _stream = stream;
1205     //_stream->setAutoSyncSize(STREAM_AUTO_SYNC_SIZE);
1206     if ( _stream->SetPos(0)!=0 ) {
1207         CRLog::error( "CacheFile::create: cannot seek file");
1208         _stream.Clear();
1209         return false;
1210     }
1211 
1212     _size = _sectorSize;
1213     LVArray<lUInt8> sector0(_sectorSize, 0);
1214     lvsize_t bytesWritten = 0;
1215     _stream->Write(sector0.get(), _sectorSize, &bytesWritten );
1216     if ( (int)bytesWritten!=_sectorSize ) {
1217         _stream.Clear();
1218         return false;
1219     }
1220     if (!updateHeader()) {
1221         _stream.Clear();
1222         return false;
1223     }
1224     return true;
1225 }
1226 
1227 // BLOB storage
1228 
1229 class ldomBlobItem {
1230     int _storageIndex;
1231     lString32 _name;
1232     int _size;
1233     lUInt8 * _data;
1234 public:
ldomBlobItem(lString32 name)1235     ldomBlobItem( lString32 name ) : _storageIndex(-1), _name(name), _size(0), _data(NULL) {
1236 
1237     }
~ldomBlobItem()1238     ~ldomBlobItem() {
1239         if ( _data )
1240             delete[] _data;
1241     }
getSize()1242     int getSize() { return _size; }
getIndex()1243     int getIndex() { return _storageIndex; }
getData()1244     lUInt8 * getData() { return _data; }
getName()1245     lString32 getName() { return _name; }
setIndex(int index,int size)1246     void setIndex(int index, int size) {
1247         if ( _data )
1248             delete[] _data;
1249         _data = NULL;
1250         _storageIndex = index;
1251         _size = size;
1252     }
setData(const lUInt8 * data,int size)1253     void setData( const lUInt8 * data, int size ) {
1254         if ( _data )
1255             delete[] _data;
1256         if (data && size>0) {
1257             _data = new lUInt8[size];
1258             memcpy(_data, data, size);
1259             _size = size;
1260         } else {
1261             _data = NULL;
1262             _size = -1;
1263         }
1264     }
1265 };
1266 
ldomBlobCache()1267 ldomBlobCache::ldomBlobCache() : _cacheFile(NULL), _changed(false)
1268 {
1269 
1270 }
1271 
1272 #define BLOB_INDEX_MAGIC "BLOBINDX"
1273 
loadIndex()1274 bool ldomBlobCache::loadIndex()
1275 {
1276     bool res;
1277     SerialBuf buf(0,true);
1278     res = _cacheFile->read(CBT_BLOB_INDEX, buf);
1279     if (!res) {
1280         _list.clear();
1281         return true; // missing blob index: treat as empty list of blobs
1282     }
1283     if (!buf.checkMagic(BLOB_INDEX_MAGIC))
1284         return false;
1285     lUInt32 len;
1286     buf >> len;
1287     for ( lUInt32 i = 0; i<len; i++ ) {
1288         lString32 name;
1289         buf >> name;
1290         lUInt32 size;
1291         buf >> size;
1292         if (buf.error())
1293             break;
1294         ldomBlobItem * item = new ldomBlobItem(name);
1295         item->setIndex(i, size);
1296         _list.add(item);
1297     }
1298     res = !buf.error();
1299     return res;
1300 }
1301 
saveIndex()1302 bool ldomBlobCache::saveIndex()
1303 {
1304     bool res;
1305     SerialBuf buf(0,true);
1306     buf.putMagic(BLOB_INDEX_MAGIC);
1307     lUInt32 len = _list.length();
1308     buf << len;
1309     for ( lUInt32 i = 0; i<len; i++ ) {
1310         ldomBlobItem * item = _list[i];
1311         buf << item->getName();
1312         buf << (lUInt32)item->getSize();
1313     }
1314     res = _cacheFile->write( CBT_BLOB_INDEX, buf, false );
1315     return res;
1316 }
1317 
saveToCache(CRTimerUtil & timeout)1318 ContinuousOperationResult ldomBlobCache::saveToCache(CRTimerUtil & timeout)
1319 {
1320     if (!_list.length() || !_changed || _cacheFile==NULL)
1321         return CR_DONE;
1322     bool res = true;
1323     for ( int i=0; i<_list.length(); i++ ) {
1324         ldomBlobItem * item = _list[i];
1325         if ( item->getData() ) {
1326             res = _cacheFile->write(CBT_BLOB_DATA, i, item->getData(), item->getSize(), false) && res;
1327             if (res)
1328                 item->setIndex(i, item->getSize());
1329         }
1330         if (timeout.expired())
1331             return CR_TIMEOUT;
1332     }
1333     res = saveIndex() && res;
1334     if ( res )
1335         _changed = false;
1336     return res ? CR_DONE : CR_ERROR;
1337 }
1338 
setCacheFile(CacheFile * cacheFile)1339 void ldomBlobCache::setCacheFile( CacheFile * cacheFile )
1340 {
1341     _cacheFile = cacheFile;
1342     CRTimerUtil infinite;
1343     if (_list.empty())
1344         loadIndex();
1345     else
1346         saveToCache(infinite);
1347 }
1348 
addBlob(const lUInt8 * data,int size,lString32 name)1349 bool ldomBlobCache::addBlob( const lUInt8 * data, int size, lString32 name )
1350 {
1351     CRLog::debug("ldomBlobCache::addBlob( %s, size=%d, [%02x,%02x,%02x,%02x] )", LCSTR(name), size, data[0], data[1], data[2], data[3]);
1352     int index = _list.length();
1353     ldomBlobItem * item = new ldomBlobItem(name);
1354     if (_cacheFile != NULL) {
1355         _cacheFile->write(CBT_BLOB_DATA, index, data, size, false);
1356         item->setIndex(index, size);
1357     } else {
1358         item->setData(data, size);
1359     }
1360     _list.add(item);
1361     _changed = true;
1362     return true;
1363 }
1364 
getBlob(lString32 name)1365 LVStreamRef ldomBlobCache::getBlob( lString32 name )
1366 {
1367     ldomBlobItem * item = NULL;
1368     lUInt16 index = 0;
1369     for ( int i=0; i<_list.length(); i++ ) {
1370         if (_list[i]->getName() == name) {
1371             item = _list[i];
1372             index = i;
1373             break;
1374         }
1375     }
1376     if (item) {
1377         if (item->getData()) {
1378             // RAM
1379             return LVCreateMemoryStream(item->getData(), item->getSize(), true);
1380         } else {
1381             // CACHE FILE
1382             return _cacheFile->readStream(CBT_BLOB_DATA, index);
1383         }
1384     }
1385     return LVStreamRef();
1386 }
1387 
1388 #if BUILD_LITE!=1
1389 //#define DEBUG_RENDER_RECT_ACCESS
1390 #ifdef DEBUG_RENDER_RECT_ACCESS
1391   static signed char render_rect_flags[200000]={0};
rr_lock(ldomNode * node)1392   static void rr_lock( ldomNode * node )
1393   {
1394     int index = node->getDataIndex()>>4;
1395     CRLog::debug("RenderRectAccessor(%d) lock", index );
1396     if ( render_rect_flags[index] )
1397         crFatalError(123, "render rect accessor: cannot get lock");
1398     render_rect_flags[index] = 1;
1399   }
rr_unlock(ldomNode * node)1400   static void rr_unlock( ldomNode * node )
1401   {
1402     int index = node->getDataIndex()>>4;
1403     CRLog::debug("RenderRectAccessor(%d) lock", index );
1404     if ( !render_rect_flags[index] )
1405         crFatalError(123, "render rect accessor: unlock w/o lock");
1406     render_rect_flags[index] = 0;
1407   }
1408 #endif
1409 
RenderRectAccessor(ldomNode * node)1410 RenderRectAccessor::RenderRectAccessor( ldomNode * node )
1411 : _node(node), _modified(false), _dirty(false)
1412 {
1413 #ifdef DEBUG_RENDER_RECT_ACCESS
1414     rr_lock( _node );
1415 #endif
1416     _node->getRenderData(*this);
1417 }
1418 
~RenderRectAccessor()1419 RenderRectAccessor::~RenderRectAccessor()
1420 {
1421     if ( _modified )
1422         _node->setRenderData(*this);
1423 #ifdef DEBUG_RENDER_RECT_ACCESS
1424     if ( !_dirty )
1425         rr_unlock( _node );
1426 #endif
1427 }
1428 
clear()1429 void RenderRectAccessor::clear()
1430 {
1431     lvdomElementFormatRec::clear(); // will clear every field
1432     _modified = true;
1433     _dirty = false;
1434 }
1435 
push()1436 void RenderRectAccessor::push()
1437 {
1438     if ( _modified ) {
1439         _node->setRenderData(*this);
1440         _modified = false;
1441         _dirty = true;
1442         #ifdef DEBUG_RENDER_RECT_ACCESS
1443             rr_unlock( _node );
1444         #endif
1445     }
1446 }
1447 
setX(int x)1448 void RenderRectAccessor::setX( int x )
1449 {
1450     if ( _dirty ) {
1451         _dirty = false;
1452         _node->getRenderData(*this);
1453 #ifdef DEBUG_RENDER_RECT_ACCESS
1454         rr_lock( _node );
1455 #endif
1456     }
1457     if ( _x != x ) {
1458         _x = x;
1459         _modified = true;
1460     }
1461 }
setY(int y)1462 void RenderRectAccessor::setY( int y )
1463 {
1464     if ( _dirty ) {
1465         _dirty = false;
1466         _node->getRenderData(*this);
1467 #ifdef DEBUG_RENDER_RECT_ACCESS
1468         rr_lock( _node );
1469 #endif
1470     }
1471     if ( _y != y ) {
1472         _y = y;
1473         _modified = true;
1474     }
1475 }
setWidth(int w)1476 void RenderRectAccessor::setWidth( int w )
1477 {
1478     if ( _dirty ) {
1479         _dirty = false;
1480         _node->getRenderData(*this);
1481 #ifdef DEBUG_RENDER_RECT_ACCESS
1482         rr_lock( _node );
1483 #endif
1484     }
1485     if ( _width != w ) {
1486         _width = w;
1487         _modified = true;
1488     }
1489 }
setHeight(int h)1490 void RenderRectAccessor::setHeight( int h )
1491 {
1492     if ( _dirty ) {
1493         _dirty = false;
1494         _node->getRenderData(*this);
1495 #ifdef DEBUG_RENDER_RECT_ACCESS
1496         rr_lock( _node );
1497 #endif
1498     }
1499     if ( _height != h ) {
1500         _height = h;
1501         _modified = true;
1502     }
1503 }
1504 
getX()1505 int RenderRectAccessor::getX()
1506 {
1507     if ( _dirty ) {
1508         _dirty = false;
1509         _node->getRenderData(*this);
1510 #ifdef DEBUG_RENDER_RECT_ACCESS
1511         rr_lock( _node );
1512 #endif
1513     }
1514     return _x;
1515 }
getY()1516 int RenderRectAccessor::getY()
1517 {
1518     if ( _dirty ) {
1519         _dirty = false;
1520         _node->getRenderData(*this);
1521 #ifdef DEBUG_RENDER_RECT_ACCESS
1522         rr_lock( _node );
1523 #endif
1524     }
1525     return _y;
1526 }
getWidth()1527 int RenderRectAccessor::getWidth()
1528 {
1529     if ( _dirty ) {
1530         _dirty = false;
1531         _node->getRenderData(*this);
1532 #ifdef DEBUG_RENDER_RECT_ACCESS
1533         rr_lock( _node );
1534 #endif
1535     }
1536     return _width;
1537 }
getHeight()1538 int RenderRectAccessor::getHeight()
1539 {
1540     if ( _dirty ) {
1541         _dirty = false;
1542         _node->getRenderData(*this);
1543 #ifdef DEBUG_RENDER_RECT_ACCESS
1544         rr_lock( _node );
1545 #endif
1546     }
1547     return _height;
1548 }
getRect(lvRect & rc)1549 void RenderRectAccessor::getRect( lvRect & rc )
1550 {
1551     if ( _dirty ) {
1552         _dirty = false;
1553         _node->getRenderData(*this);
1554 #ifdef DEBUG_RENDER_RECT_ACCESS
1555         rr_lock( _node );
1556 #endif
1557     }
1558     rc.left = _x;
1559     rc.top = _y;
1560     rc.right = _x + _width;
1561     rc.bottom = _y + _height;
1562 }
1563 
setInnerX(int x)1564 void RenderRectAccessor::setInnerX( int x )
1565 {
1566     if ( _dirty ) {
1567         _dirty = false;
1568         _node->getRenderData(*this);
1569 #ifdef DEBUG_RENDER_RECT_ACCESS
1570         rr_lock( _node );
1571 #endif
1572     }
1573     if ( _inner_x != x ) {
1574         _inner_x = x;
1575         _modified = true;
1576     }
1577 }
setInnerY(int y)1578 void RenderRectAccessor::setInnerY( int y )
1579 {
1580     if ( _dirty ) {
1581         _dirty = false;
1582         _node->getRenderData(*this);
1583 #ifdef DEBUG_RENDER_RECT_ACCESS
1584         rr_lock( _node );
1585 #endif
1586     }
1587     if ( _inner_y != y ) {
1588         _inner_y = y;
1589         _modified = true;
1590     }
1591 }
setInnerWidth(int w)1592 void RenderRectAccessor::setInnerWidth( int w )
1593 {
1594     if ( _dirty ) {
1595         _dirty = false;
1596         _node->getRenderData(*this);
1597 #ifdef DEBUG_RENDER_RECT_ACCESS
1598         rr_lock( _node );
1599 #endif
1600     }
1601     if ( _inner_width != w ) {
1602         _inner_width = w;
1603         _modified = true;
1604     }
1605 }
getInnerX()1606 int RenderRectAccessor::getInnerX()
1607 {
1608     if ( _dirty ) {
1609         _dirty = false;
1610         _node->getRenderData(*this);
1611 #ifdef DEBUG_RENDER_RECT_ACCESS
1612         rr_lock( _node );
1613 #endif
1614     }
1615     return _inner_x;
1616 }
getInnerY()1617 int RenderRectAccessor::getInnerY()
1618 {
1619     if ( _dirty ) {
1620         _dirty = false;
1621         _node->getRenderData(*this);
1622 #ifdef DEBUG_RENDER_RECT_ACCESS
1623         rr_lock( _node );
1624 #endif
1625     }
1626     return _inner_y;
1627 }
getInnerWidth()1628 int RenderRectAccessor::getInnerWidth()
1629 {
1630     if ( _dirty ) {
1631         _dirty = false;
1632         _node->getRenderData(*this);
1633 #ifdef DEBUG_RENDER_RECT_ACCESS
1634         rr_lock( _node );
1635 #endif
1636     }
1637     return _inner_width;
1638 }
getUsableLeftOverflow()1639 int RenderRectAccessor::getUsableLeftOverflow()
1640 {
1641     if ( _dirty ) {
1642         _dirty = false;
1643         _node->getRenderData(*this);
1644 #ifdef DEBUG_RENDER_RECT_ACCESS
1645         rr_lock( _node );
1646 #endif
1647     }
1648     return _usable_left_overflow;
1649 }
getUsableRightOverflow()1650 int RenderRectAccessor::getUsableRightOverflow()
1651 {
1652     if ( _dirty ) {
1653         _dirty = false;
1654         _node->getRenderData(*this);
1655 #ifdef DEBUG_RENDER_RECT_ACCESS
1656         rr_lock( _node );
1657 #endif
1658     }
1659     return _usable_right_overflow;
1660 }
setUsableLeftOverflow(int dx)1661 void RenderRectAccessor::setUsableLeftOverflow( int dx )
1662 {
1663     if ( _dirty ) {
1664         _dirty = false;
1665         _node->getRenderData(*this);
1666 #ifdef DEBUG_RENDER_RECT_ACCESS
1667         rr_lock( _node );
1668 #endif
1669     }
1670     if ( dx < 0 ) dx = 0; // don't allow a negative value
1671     if ( _usable_left_overflow != dx ) {
1672         _usable_left_overflow = dx;
1673         _modified = true;
1674     }
1675 }
setUsableRightOverflow(int dx)1676 void RenderRectAccessor::setUsableRightOverflow( int dx )
1677 {
1678     if ( _dirty ) {
1679         _dirty = false;
1680         _node->getRenderData(*this);
1681 #ifdef DEBUG_RENDER_RECT_ACCESS
1682         rr_lock( _node );
1683 #endif
1684     }
1685     if ( dx < 0 ) dx = 0; // don't allow a negative value
1686     if ( _usable_right_overflow != dx ) {
1687         _usable_right_overflow = dx;
1688         _modified = true;
1689     }
1690 }
getTopOverflow()1691 int RenderRectAccessor::getTopOverflow()
1692 {
1693     if ( _dirty ) {
1694         _dirty = false;
1695         _node->getRenderData(*this);
1696 #ifdef DEBUG_RENDER_RECT_ACCESS
1697         rr_lock( _node );
1698 #endif
1699     }
1700     return _top_overflow;
1701 }
getBottomOverflow()1702 int RenderRectAccessor::getBottomOverflow()
1703 {
1704     if ( _dirty ) {
1705         _dirty = false;
1706         _node->getRenderData(*this);
1707 #ifdef DEBUG_RENDER_RECT_ACCESS
1708         rr_lock( _node );
1709 #endif
1710     }
1711     return _bottom_overflow;
1712 }
setTopOverflow(int dy)1713 void RenderRectAccessor::setTopOverflow( int dy )
1714 {
1715     if ( _dirty ) {
1716         _dirty = false;
1717         _node->getRenderData(*this);
1718 #ifdef DEBUG_RENDER_RECT_ACCESS
1719         rr_lock( _node );
1720 #endif
1721     }
1722     if ( dy < 0 ) dy = 0; // don't allow a negative value
1723     if ( _top_overflow != dy ) {
1724         _top_overflow = dy;
1725         _modified = true;
1726     }
1727 }
setBottomOverflow(int dy)1728 void RenderRectAccessor::setBottomOverflow( int dy )
1729 {
1730     if ( _dirty ) {
1731         _dirty = false;
1732         _node->getRenderData(*this);
1733 #ifdef DEBUG_RENDER_RECT_ACCESS
1734         rr_lock( _node );
1735 #endif
1736     }
1737     if ( dy < 0 ) dy = 0; // don't allow a negative value
1738     if ( _bottom_overflow != dy ) {
1739         _bottom_overflow = dy;
1740         _modified = true;
1741     }
1742 }
getBaseline()1743 int RenderRectAccessor::getBaseline()
1744 {
1745     if ( _dirty ) {
1746         _dirty = false;
1747         _node->getRenderData(*this);
1748 #ifdef DEBUG_RENDER_RECT_ACCESS
1749         rr_lock( _node );
1750 #endif
1751     }
1752     return _baseline;
1753 }
setBaseline(int baseline)1754 void RenderRectAccessor::setBaseline( int baseline )
1755 {
1756     if ( _dirty ) {
1757         _dirty = false;
1758         _node->getRenderData(*this);
1759 #ifdef DEBUG_RENDER_RECT_ACCESS
1760         rr_lock( _node );
1761 #endif
1762     }
1763     if ( _baseline != baseline ) {
1764         _baseline = baseline;
1765         _modified = true;
1766     }
1767 }
getListPropNodeIndex()1768 int RenderRectAccessor::getListPropNodeIndex()
1769 {
1770     if ( _dirty ) {
1771         _dirty = false;
1772         _node->getRenderData(*this);
1773 #ifdef DEBUG_RENDER_RECT_ACCESS
1774         rr_lock( _node );
1775 #endif
1776     }
1777     return _listprop_node_idx;
1778 }
setListPropNodeIndex(int idx)1779 void RenderRectAccessor::setListPropNodeIndex( int idx )
1780 {
1781     if ( _dirty ) {
1782         _dirty = false;
1783         _node->getRenderData(*this);
1784 #ifdef DEBUG_RENDER_RECT_ACCESS
1785         rr_lock( _node );
1786 #endif
1787     }
1788     if ( _listprop_node_idx != idx ) {
1789         _listprop_node_idx = idx;
1790         _modified = true;
1791     }
1792 }
getLangNodeIndex()1793 int RenderRectAccessor::getLangNodeIndex()
1794 {
1795     if ( _dirty ) {
1796         _dirty = false;
1797         _node->getRenderData(*this);
1798 #ifdef DEBUG_RENDER_RECT_ACCESS
1799         rr_lock( _node );
1800 #endif
1801     }
1802     return _lang_node_idx;
1803 }
setLangNodeIndex(int idx)1804 void RenderRectAccessor::setLangNodeIndex( int idx )
1805 {
1806     if ( _dirty ) {
1807         _dirty = false;
1808         _node->getRenderData(*this);
1809 #ifdef DEBUG_RENDER_RECT_ACCESS
1810         rr_lock( _node );
1811 #endif
1812     }
1813     if ( _lang_node_idx != idx ) {
1814         _lang_node_idx = idx;
1815         _modified = true;
1816     }
1817 }
getFlags()1818 unsigned short RenderRectAccessor::getFlags()
1819 {
1820     if ( _dirty ) {
1821         _dirty = false;
1822         _node->getRenderData(*this);
1823 #ifdef DEBUG_RENDER_RECT_ACCESS
1824         rr_lock( _node );
1825 #endif
1826     }
1827     return _flags;
1828 }
setFlags(unsigned short flags)1829 void RenderRectAccessor::setFlags( unsigned short flags )
1830 {
1831     if ( _dirty ) {
1832         _dirty = false;
1833         _node->getRenderData(*this);
1834 #ifdef DEBUG_RENDER_RECT_ACCESS
1835         rr_lock( _node );
1836 #endif
1837     }
1838     if ( _flags != flags ) {
1839         _flags = flags;
1840         _modified = true;
1841     }
1842 }
getTopRectsExcluded(int & lw,int & lh,int & rw,int & rh)1843 void RenderRectAccessor::getTopRectsExcluded( int & lw, int & lh, int & rw, int & rh )
1844 {
1845     if ( _dirty ) {
1846         _dirty = false;
1847         _node->getRenderData(*this);
1848 #ifdef DEBUG_RENDER_RECT_ACCESS
1849         rr_lock( _node );
1850 #endif
1851     }
1852     lw = _extra1 >> 16;    // Both stored in a single int slot (widths are
1853     rw = _extra1 & 0xFFFF; // constrained to lUint16 in many other places)
1854     lh = _extra2;
1855     rh = _extra3;
1856 }
setTopRectsExcluded(int lw,int lh,int rw,int rh)1857 void RenderRectAccessor::setTopRectsExcluded( int lw, int lh, int rw, int rh )
1858 {
1859     if ( _dirty ) {
1860         _dirty = false;
1861         _node->getRenderData(*this);
1862 #ifdef DEBUG_RENDER_RECT_ACCESS
1863         rr_lock( _node );
1864 #endif
1865     }
1866     if ( _extra2 != lh || _extra3 != rh || (_extra1>>16) != lw || (_extra1&0xFFFF) != rw ) {
1867         _extra1 = (lw<<16) + rw;
1868         _extra2 = lh;
1869         _extra3 = rh;
1870         _modified = true;
1871     }
1872 }
getNextFloatMinYs(int & left,int & right)1873 void RenderRectAccessor::getNextFloatMinYs( int & left, int & right )
1874 {
1875     if ( _dirty ) {
1876         _dirty = false;
1877         _node->getRenderData(*this);
1878 #ifdef DEBUG_RENDER_RECT_ACCESS
1879         rr_lock( _node );
1880 #endif
1881     }
1882     left = _extra4;
1883     right = _extra5;
1884 }
setNextFloatMinYs(int left,int right)1885 void RenderRectAccessor::setNextFloatMinYs( int left, int right )
1886 {
1887     if ( _dirty ) {
1888         _dirty = false;
1889         _node->getRenderData(*this);
1890 #ifdef DEBUG_RENDER_RECT_ACCESS
1891         rr_lock( _node );
1892 #endif
1893     }
1894     if ( _extra4 != left || _extra5 != right ) {
1895         _extra4 = left;
1896         _extra5 = right;
1897         _modified = true;
1898     }
1899 }
getInvolvedFloatIds(int & float_count,lUInt32 * float_ids)1900 void RenderRectAccessor::getInvolvedFloatIds( int & float_count, lUInt32 * float_ids )
1901 {
1902     if ( _dirty ) {
1903         _dirty = false;
1904         _node->getRenderData(*this);
1905 #ifdef DEBUG_RENDER_RECT_ACCESS
1906         rr_lock( _node );
1907 #endif
1908     }
1909     float_count = _extra0;
1910     if (float_count > 0) float_ids[0] = _extra1;
1911     if (float_count > 1) float_ids[1] = _extra2;
1912     if (float_count > 2) float_ids[2] = _extra3;
1913     if (float_count > 3) float_ids[3] = _extra4;
1914     if (float_count > 4) float_ids[4] = _extra5;
1915 }
setInvolvedFloatIds(int float_count,lUInt32 * float_ids)1916 void RenderRectAccessor::setInvolvedFloatIds( int float_count, lUInt32 * float_ids )
1917 {
1918     if ( _dirty ) {
1919         _dirty = false;
1920         _node->getRenderData(*this);
1921 #ifdef DEBUG_RENDER_RECT_ACCESS
1922         rr_lock( _node );
1923 #endif
1924     }
1925     _extra0 = float_count;
1926     if (float_count > 0) _extra1 = float_ids[0];
1927     if (float_count > 1) _extra2 = float_ids[1];
1928     if (float_count > 2) _extra3 = float_ids[2];
1929     if (float_count > 3) _extra4 = float_ids[3];
1930     if (float_count > 4) _extra5 = float_ids[4];
1931     _modified = true;
1932 }
1933 
1934 #endif
1935 
1936 
1937 class ldomPersistentText;
1938 class ldomPersistentElement;
1939 
1940 /// common header for data storage items
1941 struct DataStorageItemHeader {
1942     /// item type: LXML_TEXT_NODE, LXML_ELEMENT_NODE, LXML_NO_DATA
1943     lUInt16 type;
1944     /// size of item / 16
1945     lUInt16 sizeDiv16;
1946     /// data index of this node in document
1947     lInt32 dataIndex;
1948     /// data index of parent node in document, 0 means no parent
1949     lInt32 parentIndex;
1950 };
1951 
1952 /// text node storage implementation
1953 struct TextDataStorageItem : public DataStorageItemHeader {
1954     /// utf8 text length, characters
1955     lUInt16 length;
1956     /// utf8 text, w/o zero
1957     lChar8 text[2]; // utf8 text follows here, w/o zero byte at end
1958     /// return text
getTextTextDataStorageItem1959     inline lString32 getText() { return Utf8ToUnicode( text, length ); }
getText8TextDataStorageItem1960     inline lString8 getText8() { return lString8( text, length ); }
1961 };
1962 
1963 /// element node data storage
1964 struct ElementDataStorageItem : public DataStorageItemHeader {
1965     lUInt16 id;
1966     lUInt16 nsid;
1967     lInt16  attrCount;
1968     lUInt8  rendMethod;
1969     lUInt8  reserved8;
1970     lInt32  childCount;
1971     lInt32  children[1];
attrsElementDataStorageItem1972     lUInt16 * attrs() { return (lUInt16 *)(children + childCount); }
attrElementDataStorageItem1973     lxmlAttribute * attr( int index ) { return (lxmlAttribute *)&(((lUInt16 *)(children + childCount))[index*4]); }
getAttrValueIdElementDataStorageItem1974     lUInt32 getAttrValueId( lUInt16 ns, lUInt16 id )
1975     {
1976         lUInt16 * a = attrs();
1977         for ( int i=0; i<attrCount; i++ ) {
1978             lxmlAttribute * attr = (lxmlAttribute *)(&a[i*4]);
1979             if ( !attr->compare( ns, id ) )
1980                 continue;
1981             return  attr->index;
1982         }
1983         return LXML_ATTR_VALUE_NONE;
1984     }
findAttrElementDataStorageItem1985     lxmlAttribute * findAttr( lUInt16 ns, lUInt16 id )
1986     {
1987         lUInt16 * a = attrs();
1988         for ( int i=0; i<attrCount; i++ ) {
1989             lxmlAttribute * attr = (lxmlAttribute *)(&a[i*4]);
1990             if ( attr->compare( ns, id ) )
1991                 return attr;
1992         }
1993         return NULL;
1994     }
1995     // TODO: add items here
1996     //css_style_ref_t _style;
1997     //font_ref_t      _font;
1998 };
1999 
2000 #endif
2001 
2002 
2003 //=================================================================
2004 // tinyNodeCollection implementation
2005 //=================================================================
2006 
tinyNodeCollection()2007 tinyNodeCollection::tinyNodeCollection()
2008 : _textCount(0)
2009 , _textNextFree(0)
2010 , _elemCount(0)
2011 , _elemNextFree(0)
2012 , _styles(STYLE_HASH_TABLE_SIZE)
2013 , _fonts(FONT_HASH_TABLE_SIZE)
2014 , _tinyElementCount(0)
2015 , _itemCount(0)
2016 #if BUILD_LITE!=1
2017 , _renderedBlockCache( 256 )
2018 , _cacheFile(NULL)
2019 , _cacheFileStale(true)
2020 , _cacheFileLeaveAsDirty(false)
2021 , _mapped(false)
2022 , _maperror(false)
2023 , _mapSavingStage(0)
2024 , _spaceWidthScalePercent(DEF_SPACE_WIDTH_SCALE_PERCENT)
2025 , _minSpaceCondensingPercent(DEF_MIN_SPACE_CONDENSING_PERCENT)
2026 , _unusedSpaceThresholdPercent(DEF_UNUSED_SPACE_THRESHOLD_PERCENT)
2027 , _maxAddedLetterSpacingPercent(DEF_MAX_ADDED_LETTER_SPACING_PERCENT)
2028 , _nodeStyleHash(0)
2029 , _nodeDisplayStyleHash(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2030 , _nodeDisplayStyleHashInitial(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2031 , _nodeStylesInvalidIfLoading(false)
2032 #endif
2033 , _textStorage(this, 't', (lUInt32)(TEXT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), TEXT_CACHE_CHUNK_SIZE ) // persistent text node data storage
2034 , _elemStorage(this, 'e', (lUInt32)(ELEM_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), ELEM_CACHE_CHUNK_SIZE ) // persistent element data storage
2035 , _rectStorage(this, 'r', (lUInt32)(RECT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), RECT_CACHE_CHUNK_SIZE ) // element render rect storage
2036 , _styleStorage(this, 's', (lUInt32)(STYLE_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), STYLE_CACHE_CHUNK_SIZE ) // element style info storage
2037 ,_docProps(LVCreatePropsContainer())
2038 ,_docFlags(DOC_FLAG_DEFAULTS)
2039 ,_fontMap(113)
2040 ,_hangingPunctuationEnabled(false)
2041 ,_renderBlockRenderingFlags(BLOCK_RENDERING_FLAGS_DEFAULT)
2042 ,_DOMVersionRequested(DOM_VERSION_CURRENT)
2043 ,_interlineScaleFactor(INTERLINE_SCALE_FACTOR_NO_SCALE)
2044 {
2045     memset( _textList, 0, sizeof(_textList) );
2046     memset( _elemList, 0, sizeof(_elemList) );
2047     // _docIndex assigned in ldomDocument constructor
2048 }
2049 
tinyNodeCollection(tinyNodeCollection & v)2050 tinyNodeCollection::tinyNodeCollection( tinyNodeCollection & v )
2051 : _textCount(0)
2052 , _textNextFree(0)
2053 , _elemCount(0)
2054 , _elemNextFree(0)
2055 , _styles(STYLE_HASH_TABLE_SIZE)
2056 , _fonts(FONT_HASH_TABLE_SIZE)
2057 , _tinyElementCount(0)
2058 , _itemCount(0)
2059 #if BUILD_LITE!=1
2060 , _renderedBlockCache( 256 )
2061 , _cacheFile(NULL)
2062 , _cacheFileStale(true)
2063 , _cacheFileLeaveAsDirty(false)
2064 , _mapped(false)
2065 , _maperror(false)
2066 , _mapSavingStage(0)
2067 , _spaceWidthScalePercent(DEF_SPACE_WIDTH_SCALE_PERCENT)
2068 , _minSpaceCondensingPercent(DEF_MIN_SPACE_CONDENSING_PERCENT)
2069 , _unusedSpaceThresholdPercent(DEF_UNUSED_SPACE_THRESHOLD_PERCENT)
2070 , _maxAddedLetterSpacingPercent(DEF_MAX_ADDED_LETTER_SPACING_PERCENT)
2071 , _nodeStyleHash(0)
2072 , _nodeDisplayStyleHash(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2073 , _nodeDisplayStyleHashInitial(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2074 , _nodeStylesInvalidIfLoading(false)
2075 #endif
2076 , _textStorage(this, 't', (lUInt32)(TEXT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), TEXT_CACHE_CHUNK_SIZE ) // persistent text node data storage
2077 , _elemStorage(this, 'e', (lUInt32)(ELEM_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), ELEM_CACHE_CHUNK_SIZE ) // persistent element data storage
2078 , _rectStorage(this, 'r', (lUInt32)(RECT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), RECT_CACHE_CHUNK_SIZE ) // element render rect storage
2079 , _styleStorage(this, 's', (lUInt32)(STYLE_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), STYLE_CACHE_CHUNK_SIZE ) // element style info storage
2080 ,_docProps(LVCreatePropsContainer())
2081 ,_docFlags(v._docFlags)
2082 ,_stylesheet(v._stylesheet)
2083 ,_fontMap(113)
2084 ,_hangingPunctuationEnabled(v._hangingPunctuationEnabled)
2085 ,_renderBlockRenderingFlags(v._renderBlockRenderingFlags)
2086 ,_DOMVersionRequested(v._DOMVersionRequested)
2087 ,_interlineScaleFactor(v._interlineScaleFactor)
2088 {
2089     memset( _textList, 0, sizeof(_textList) );
2090     memset( _elemList, 0, sizeof(_elemList) );
2091     // _docIndex assigned in ldomDocument constructor
2092 }
2093 
setHangingPunctiationEnabled(bool value)2094 bool tinyNodeCollection::setHangingPunctiationEnabled(bool value) {
2095     if (_hangingPunctuationEnabled != value) {
2096         _hangingPunctuationEnabled = value;
2097         return true;
2098     }
2099     return false;
2100 }
2101 
setRenderBlockRenderingFlags(lUInt32 flags)2102 bool tinyNodeCollection::setRenderBlockRenderingFlags(lUInt32 flags) {
2103     if (_renderBlockRenderingFlags != flags) {
2104         _renderBlockRenderingFlags = flags;
2105         // Check coherency and ensure dependencies of flags
2106         if (_renderBlockRenderingFlags & ~BLOCK_RENDERING_ENHANCED) // If any other flag is set,
2107             _renderBlockRenderingFlags |= BLOCK_RENDERING_ENHANCED; // set ENHANGED
2108         if (_renderBlockRenderingFlags & BLOCK_RENDERING_FLOAT_FLOATBOXES)
2109             _renderBlockRenderingFlags |= BLOCK_RENDERING_PREPARE_FLOATBOXES;
2110         if (_renderBlockRenderingFlags & BLOCK_RENDERING_PREPARE_FLOATBOXES)
2111             _renderBlockRenderingFlags |= BLOCK_RENDERING_WRAP_FLOATS;
2112         return true;
2113     }
2114     return false;
2115 }
2116 
setDOMVersionRequested(lUInt32 version)2117 bool tinyNodeCollection::setDOMVersionRequested(lUInt32 version)
2118 {
2119     if (_DOMVersionRequested != version) {
2120         _DOMVersionRequested = version;
2121         return true;
2122     }
2123     return false;
2124 }
2125 
setInterlineScaleFactor(int value)2126 bool tinyNodeCollection::setInterlineScaleFactor(int value) {
2127     if (_interlineScaleFactor != value) {
2128         _interlineScaleFactor = value;
2129         return true;
2130     }
2131     return false;
2132 }
2133 
2134 #if BUILD_LITE!=1
openCacheFile()2135 bool tinyNodeCollection::openCacheFile()
2136 {
2137     if ( _cacheFile )
2138         return true;
2139     CacheFile * f = new CacheFile(_DOMVersionRequested);
2140     //lString32 cacheFileName("/tmp/cr3swap.tmp");
2141 
2142     lString32 fname = getProps()->getStringDef( DOC_PROP_FILE_NAME, "noname" );
2143     //lUInt32 sz = (lUInt32)getProps()->getInt64Def(DOC_PROP_FILE_SIZE, 0);
2144     lUInt32 crc = (lUInt32)getProps()->getIntDef(DOC_PROP_FILE_CRC32, 0);
2145 
2146     if ( !ldomDocCache::enabled() ) {
2147         CRLog::error("Cannot open cached document: cache dir is not initialized");
2148         delete f;
2149         return false;
2150     }
2151 
2152     CRLog::info("ldomDocument::openCacheFile() - looking for cache file %s", UnicodeToUtf8(fname).c_str() );
2153 
2154     lString32 cache_path;
2155     LVStreamRef map = ldomDocCache::openExisting( fname, crc, getPersistenceFlags(), cache_path );
2156     if ( map.isNull() ) {
2157         delete f;
2158         return false;
2159     }
2160     CRLog::info("ldomDocument::openCacheFile() - cache file found, trying to read index %s", UnicodeToUtf8(fname).c_str() );
2161 
2162     if ( !f->open( map ) ) {
2163         delete f;
2164         return false;
2165     }
2166     CRLog::info("ldomDocument::openCacheFile() - index read successfully %s", UnicodeToUtf8(fname).c_str() );
2167     f->setCachePath(cache_path);
2168     _cacheFile = f;
2169     _textStorage.setCache( f );
2170     _elemStorage.setCache( f );
2171     _rectStorage.setCache( f );
2172     _styleStorage.setCache( f );
2173     _blobCache.setCacheFile( f );
2174     return true;
2175 }
2176 
swapToCacheIfNecessary()2177 bool tinyNodeCollection::swapToCacheIfNecessary()
2178 {
2179     if ( !_cacheFile || _mapped || _maperror)
2180         return false;
2181     return createCacheFile();
2182     //return swapToCache();
2183 }
2184 
createCacheFile()2185 bool tinyNodeCollection::createCacheFile()
2186 {
2187     if ( _cacheFile )
2188         return true;
2189     CacheFile * f = new CacheFile(_DOMVersionRequested);
2190     //lString32 cacheFileName("/tmp/cr3swap.tmp");
2191 
2192     lString32 fname = getProps()->getStringDef( DOC_PROP_FILE_NAME, "noname" );
2193     lUInt32 sz = (lUInt32)getProps()->getInt64Def(DOC_PROP_FILE_SIZE, 0);
2194     lUInt32 crc = (lUInt32)getProps()->getIntDef(DOC_PROP_FILE_CRC32, 0);
2195 
2196     if ( !ldomDocCache::enabled() ) {
2197         CRLog::error("Cannot swap: cache dir is not initialized");
2198         delete f;
2199         return false;
2200     }
2201 
2202     CRLog::info("ldomDocument::createCacheFile() - initialized swapping of document %s to cache file", UnicodeToUtf8(fname).c_str() );
2203 
2204     lString32 cache_path;
2205     LVStreamRef map = ldomDocCache::createNew( fname, crc, getPersistenceFlags(), sz, cache_path );
2206     if ( map.isNull() ) {
2207         delete f;
2208         return false;
2209     }
2210 
2211     if ( !f->create( map ) ) {
2212         delete f;
2213         return false;
2214     }
2215     f->setCachePath(cache_path);
2216     _cacheFile = f;
2217     _mapped = true;
2218     _textStorage.setCache( f );
2219     _elemStorage.setCache( f );
2220     _rectStorage.setCache( f );
2221     _styleStorage.setCache( f );
2222     _blobCache.setCacheFile( f );
2223     setCacheFileStale(true);
2224     return true;
2225 }
2226 
getCacheFilePath()2227 lString32 tinyNodeCollection::getCacheFilePath() {
2228     return _cacheFile != NULL ? _cacheFile->getCachePath() : lString32::empty_str;
2229 }
2230 
clearNodeStyle(lUInt32 dataIndex)2231 void tinyNodeCollection::clearNodeStyle( lUInt32 dataIndex )
2232 {
2233     ldomNodeStyleInfo info;
2234     _styleStorage.getStyleData( dataIndex, &info );
2235     _styles.release( info._styleIndex );
2236     _fonts.release( info._fontIndex );
2237     info._fontIndex = info._styleIndex = 0;
2238     _styleStorage.setStyleData( dataIndex, &info );
2239     _nodeStyleHash = 0;
2240 }
2241 
setNodeStyleIndex(lUInt32 dataIndex,lUInt16 index)2242 void tinyNodeCollection::setNodeStyleIndex( lUInt32 dataIndex, lUInt16 index )
2243 {
2244     ldomNodeStyleInfo info;
2245     _styleStorage.getStyleData( dataIndex, &info );
2246     if ( info._styleIndex!=index ) {
2247         info._styleIndex = index;
2248         _styleStorage.setStyleData( dataIndex, &info );
2249         _nodeStyleHash = 0;
2250     }
2251 }
2252 
setNodeFontIndex(lUInt32 dataIndex,lUInt16 index)2253 void tinyNodeCollection::setNodeFontIndex( lUInt32 dataIndex, lUInt16 index )
2254 {
2255     ldomNodeStyleInfo info;
2256     _styleStorage.getStyleData( dataIndex, &info );
2257     if ( info._fontIndex!=index ) {
2258         info._fontIndex = index;
2259         _styleStorage.setStyleData( dataIndex, &info );
2260         _nodeStyleHash = 0;
2261     }
2262 }
2263 
getNodeStyleIndex(lUInt32 dataIndex)2264 lUInt16 tinyNodeCollection::getNodeStyleIndex( lUInt32 dataIndex )
2265 {
2266     ldomNodeStyleInfo info;
2267     _styleStorage.getStyleData( dataIndex, &info );
2268     return info._styleIndex;
2269 }
2270 
getNodeStyle(lUInt32 dataIndex)2271 css_style_ref_t tinyNodeCollection::getNodeStyle( lUInt32 dataIndex )
2272 {
2273     ldomNodeStyleInfo info;
2274     _styleStorage.getStyleData( dataIndex, &info );
2275     css_style_ref_t res =  _styles.get( info._styleIndex );
2276     if (!res.isNull())
2277         _styles.addIndexRef(info._styleIndex);
2278 #if DEBUG_DOM_STORAGE==1
2279     if ( res.isNull() && info._styleIndex!=0 ) {
2280         CRLog::error("Null style returned for index %d", (int)info._styleIndex);
2281     }
2282 #endif
2283     return res;
2284 }
2285 
getNodeFont(lUInt32 dataIndex)2286 font_ref_t tinyNodeCollection::getNodeFont( lUInt32 dataIndex )
2287 {
2288     ldomNodeStyleInfo info;
2289     _styleStorage.getStyleData( dataIndex, &info );
2290     return _fonts.get( info._fontIndex );
2291 }
2292 
setNodeStyle(lUInt32 dataIndex,css_style_ref_t & v)2293 void tinyNodeCollection::setNodeStyle( lUInt32 dataIndex, css_style_ref_t & v )
2294 {
2295     ldomNodeStyleInfo info;
2296     _styleStorage.getStyleData( dataIndex, &info );
2297     _styles.cache( info._styleIndex, v );
2298 #if DEBUG_DOM_STORAGE==1
2299     if ( info._styleIndex==0 ) {
2300         CRLog::error("tinyNodeCollection::setNodeStyle() styleIndex is 0 after caching");
2301     }
2302 #endif
2303     _styleStorage.setStyleData( dataIndex, &info );
2304     _nodeStyleHash = 0;
2305 }
2306 
setNodeFont(lUInt32 dataIndex,font_ref_t & v)2307 void tinyNodeCollection::setNodeFont( lUInt32 dataIndex, font_ref_t & v )
2308 {
2309     ldomNodeStyleInfo info;
2310     _styleStorage.getStyleData( dataIndex, &info );
2311     _fonts.cache( info._fontIndex, v );
2312     _styleStorage.setStyleData( dataIndex, &info );
2313     _nodeStyleHash = 0;
2314 }
2315 
getNodeFontIndex(lUInt32 dataIndex)2316 lUInt16 tinyNodeCollection::getNodeFontIndex( lUInt32 dataIndex )
2317 {
2318     ldomNodeStyleInfo info;
2319     _styleStorage.getStyleData( dataIndex, &info );
2320     return info._fontIndex;
2321 }
2322 
loadNodeData(lUInt16 type,ldomNode ** list,int nodecount)2323 bool tinyNodeCollection::loadNodeData(lUInt16 type, ldomNode ** list, int nodecount)
2324 {
2325     int count = ((nodecount + TNC_PART_LEN - 1) >> TNC_PART_SHIFT);
2326     for (lUInt16 i=0; i<count; i++) {
2327         int offs = i*TNC_PART_LEN;
2328         int sz = TNC_PART_LEN;
2329         if (offs + sz > nodecount) {
2330             sz = nodecount - offs;
2331         }
2332 
2333         lUInt8 * p;
2334         int buflen;
2335         if (!_cacheFile->read( type, i, p, buflen ))
2336             return false;
2337         if (!p || (unsigned)buflen != sizeof(ldomNode) * sz)
2338             return false;
2339         ldomNode * buf = (ldomNode *)p;
2340         if (sz == TNC_PART_LEN)
2341             list[i] = buf;
2342         else {
2343             // buf contains `sz' ldomNode items
2344             // _elemList, _textList (as `list' argument) must always be TNC_PART_LEN size
2345             // add into `list' zero filled (TNC_PART_LEN - sz) items
2346             list[i] = (ldomNode *)realloc(buf, TNC_PART_LEN * sizeof(ldomNode));
2347             if (NULL == list[i]) {
2348                 free(buf);
2349                 CRLog::error("Not enough memory!");
2350                 return false;
2351             }
2352             memset( list[i] + sz, 0, (TNC_PART_LEN - sz) * sizeof(ldomNode) );
2353         }
2354         for (int j=0; j<sz; j++) {
2355             list[i][j].setDocumentIndex( _docIndex );
2356             // validate loaded nodes: all non-null nodes should be marked as persistent, i.e. the actual node data: _data._pelem_addr, _data._ptext_addr,
2357             // NOT _data._elem_ptr, _data._text_ptr.
2358             // So we check this flag, but after setting document so that isNull() works correctly.
2359             // If the node is not persistent now, then _data._elem_ptr will be used, which then generate SEGFAULT.
2360             if (!list[i][j].isNull() && !list[i][j].isPersistent()) {
2361                 CRLog::error("Invalid cached node, flag PERSISTENT are NOT set: segment=%d, index=%d", i, j);
2362                 // list[i] will be freed in the caller method.
2363                 return false;
2364             }
2365             if ( list[i][j].isElement() ) {
2366                 // will be set by loadStyles/updateStyles
2367                 //list[i][j]._data._pelem._styleIndex = 0;
2368                 setNodeFontIndex( list[i][j]._handle._dataIndex, 0 );
2369                 //list[i][j]._data._pelem._fontIndex = 0;
2370             }
2371         }
2372     }
2373     return true;
2374 }
2375 
saveNodeData(lUInt16 type,ldomNode ** list,int nodecount)2376 bool tinyNodeCollection::saveNodeData( lUInt16 type, ldomNode ** list, int nodecount )
2377 {
2378     int count = ((nodecount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
2379     for (lUInt16 i=0; i<count; i++) {
2380         if (!list[i])
2381             continue;
2382         int offs = i*TNC_PART_LEN;
2383         int sz = TNC_PART_LEN;
2384         if (offs + sz > nodecount) {
2385             sz = nodecount - offs;
2386         }
2387         ldomNode buf[TNC_PART_LEN];
2388         memcpy(buf, list[i], sizeof(ldomNode) * sz);
2389         for (int j = 0; j < sz; j++) {
2390             buf[j].setDocumentIndex(_docIndex);
2391             // On 64bits builds, this serialized ldomNode may have some
2392             // random data at the end, for being:
2393             //   union { [...] tinyElement * _elem_ptr; [...] lUInt32 _ptext_addr; [...] lUInt32 _nextFreeIndex }
2394             // To get "reproducible" cache files with a same file checksum, we'd
2395             // rather have the remains of the _elem_ptr sets to 0
2396             if (sizeof(int *) == 8) { // 64bits
2397                 lUInt32 tmp = buf[j]._data._nextFreeIndex; // save 32bits part
2398                 buf[j]._data._elem_ptr = 0;                // clear 64bits area
2399                 buf[j]._data._nextFreeIndex = tmp;         // restore 32bits part
2400             }
2401         }
2402         if (!_cacheFile->write(type, i, (lUInt8*)buf, sizeof(ldomNode) * sz, COMPRESS_NODE_DATA))
2403             crFatalError(-1, "Cannot write node data");
2404     }
2405     return true;
2406 }
2407 
2408 #define NODE_INDEX_MAGIC 0x19283746
saveNodeData()2409 bool tinyNodeCollection::saveNodeData()
2410 {
2411     SerialBuf buf(12, true);
2412     buf << (lUInt32)NODE_INDEX_MAGIC << (lUInt32)_elemCount << (lUInt32)_textCount;
2413     if ( !saveNodeData( CBT_ELEM_NODE, _elemList, _elemCount+1 ) )
2414         return false;
2415     if ( !saveNodeData( CBT_TEXT_NODE, _textList, _textCount+1 ) )
2416         return false;
2417     if ( !_cacheFile->write(CBT_NODE_INDEX, buf, COMPRESS_NODE_DATA) )
2418         return false;
2419     return true;
2420 }
2421 
loadNodeData()2422 bool tinyNodeCollection::loadNodeData()
2423 {
2424     SerialBuf buf(0, true);
2425     if ( !_cacheFile->read((lUInt16)CBT_NODE_INDEX, buf) )
2426         return false;
2427     lUInt32 magic;
2428     lInt32 elemcount;
2429     lInt32 textcount;
2430     buf >> magic >> elemcount >> textcount;
2431     if ( magic != NODE_INDEX_MAGIC ) {
2432         return false;
2433     }
2434     if ( elemcount<=0 )
2435         return false;
2436     if ( textcount<=0 )
2437         return false;
2438     ldomNode * elemList[TNC_PART_COUNT] = { 0 };
2439     ldomNode * textList[TNC_PART_COUNT] = { 0 };
2440     if ( !loadNodeData( CBT_ELEM_NODE, elemList, elemcount+1 ) ) {
2441         for ( int i=0; i<TNC_PART_COUNT; i++ )
2442             if ( elemList[i] )
2443                 free( elemList[i] );
2444         return false;
2445     }
2446     if ( !loadNodeData( CBT_TEXT_NODE, textList, textcount+1 ) ) {
2447         for ( int i=0; i<TNC_PART_COUNT; i++ )
2448             if ( textList[i] )
2449                 free( textList[i] );
2450         // Also clean elemList previously successfully loaded, to avoid mem leak
2451         for ( int i=0; i<TNC_PART_COUNT; i++ )
2452             if ( elemList[i] )
2453                 free( elemList[i] );
2454         return false;
2455     }
2456     for ( int i=0; i<TNC_PART_COUNT; i++ ) {
2457         if ( _elemList[i] )
2458             free( _elemList[i] );
2459         if ( _textList[i] )
2460             free( _textList[i] );
2461     }
2462     memcpy( _elemList, elemList, sizeof(elemList) );
2463     memcpy( _textList, textList, sizeof(textList) );
2464     _elemCount = elemcount;
2465     _textCount = textcount;
2466     return true;
2467 }
2468 #endif  // BUILD_LITE!=1
2469 
2470 /// get ldomNode instance pointer
getTinyNode(lUInt32 index)2471 ldomNode * tinyNodeCollection::getTinyNode( lUInt32 index )
2472 {
2473     if ( !index )
2474         return NULL;
2475     if ( index & 1 ) // element
2476         return &(_elemList[index>>TNC_PART_INDEX_SHIFT][(index>>4)&TNC_PART_MASK]);
2477     else // text
2478         return &(_textList[index>>TNC_PART_INDEX_SHIFT][(index>>4)&TNC_PART_MASK]);
2479 }
2480 
2481 /// allocate new tiny node
allocTinyNode(int type)2482 ldomNode * tinyNodeCollection::allocTinyNode( int type )
2483 {
2484     ldomNode * res;
2485     if ( type & 1 ) {
2486         // allocate Element
2487         if ( _elemNextFree ) {
2488             // reuse existing free item
2489             int index = (_elemNextFree << 4) | type;
2490             res = getTinyNode(index);
2491             res->_handle._dataIndex = index;
2492             _elemNextFree = res->_data._nextFreeIndex;
2493         } else {
2494             // create new item
2495             _elemCount++;
2496             int idx = _elemCount >> TNC_PART_SHIFT;
2497             if (idx >= TNC_PART_COUNT)
2498                 crFatalError(1003, "allocTinyNode: can't create any more element nodes (hard limit)");
2499             ldomNode * part = _elemList[idx];
2500             if ( !part ) {
2501                 part = (ldomNode*)calloc(TNC_PART_LEN, sizeof(*part));
2502                 _elemList[idx] = part;
2503             }
2504             res = &part[_elemCount & TNC_PART_MASK];
2505             res->setDocumentIndex( _docIndex );
2506             res->_handle._dataIndex = (_elemCount << 4) | type;
2507         }
2508         _itemCount++;
2509     } else {
2510         // allocate Text
2511         if ( _textNextFree ) {
2512             // reuse existing free item
2513             int index = (_textNextFree << 4) | type;
2514             res = getTinyNode(index);
2515             res->_handle._dataIndex = index;
2516             _textNextFree = res->_data._nextFreeIndex;
2517         } else {
2518             // create new item
2519             _textCount++;
2520             if (_textCount >= (TNC_PART_COUNT << TNC_PART_SHIFT))
2521                 crFatalError(1003, "allocTinyNode: can't create any more text nodes (hard limit)");
2522             ldomNode * part = _textList[_textCount >> TNC_PART_SHIFT];
2523             if ( !part ) {
2524                 part = (ldomNode*)calloc(TNC_PART_LEN, sizeof(*part));
2525                 _textList[ _textCount >> TNC_PART_SHIFT ] = part;
2526             }
2527             res = &part[_textCount & TNC_PART_MASK];
2528             res->setDocumentIndex( _docIndex );
2529             res->_handle._dataIndex = (_textCount << 4) | type;
2530         }
2531         _itemCount++;
2532     }
2533     _nodeStyleHash = 0;
2534     return res;
2535 }
2536 
recycleTinyNode(lUInt32 index)2537 void tinyNodeCollection::recycleTinyNode( lUInt32 index )
2538 {
2539     if ( index & 1 ) {
2540         // element
2541         index >>= 4;
2542         ldomNode * part = _elemList[index >> TNC_PART_SHIFT];
2543         ldomNode * p = &part[index & TNC_PART_MASK];
2544         p->_handle._dataIndex = 0; // indicates NULL node
2545         p->_data._nextFreeIndex = _elemNextFree;
2546         _elemNextFree = index;
2547         _itemCount--;
2548     } else {
2549         // text
2550         index >>= 4;
2551         ldomNode * part = _textList[index >> TNC_PART_SHIFT];
2552         ldomNode * p = &part[index & TNC_PART_MASK];
2553         p->_handle._dataIndex = 0; // indicates NULL node
2554         p->_data._nextFreeIndex = _textNextFree;
2555         _textNextFree = index;
2556         _itemCount--;
2557     }
2558     _nodeStyleHash = 0;
2559 }
2560 
~tinyNodeCollection()2561 tinyNodeCollection::~tinyNodeCollection()
2562 {
2563 #if BUILD_LITE!=1
2564     if ( _cacheFile )
2565         delete _cacheFile;
2566 #endif
2567     // clear all elem parts
2568     for ( int partindex = 0; partindex<=(_elemCount>>TNC_PART_SHIFT); partindex++ ) {
2569         ldomNode * part = _elemList[partindex];
2570         if ( part ) {
2571             int n0 = TNC_PART_LEN * partindex;
2572             for ( int i=0; i<TNC_PART_LEN && n0+i<=_elemCount; i++ )
2573                 part[i].onCollectionDestroy();
2574             free(part);
2575             _elemList[partindex] = NULL;
2576         }
2577     }
2578     // clear all text parts
2579     for ( int partindex = 0; partindex<=(_textCount>>TNC_PART_SHIFT); partindex++ ) {
2580         ldomNode * part = _textList[partindex];
2581         if ( part ) {
2582             int n0 = TNC_PART_LEN * partindex;
2583             for ( int i=0; i<TNC_PART_LEN && n0+i<=_textCount; i++ )
2584                 part[i].onCollectionDestroy();
2585             free(part);
2586             _textList[partindex] = NULL;
2587         }
2588     }
2589     // document unregistered in ldomDocument destructor
2590 }
2591 
2592 #if BUILD_LITE!=1
2593 /// put all objects into persistent storage
persist(CRTimerUtil & maxTime)2594 void tinyNodeCollection::persist( CRTimerUtil & maxTime )
2595 {
2596     CRLog::info("lxmlDocBase::persist() invoked - converting all nodes to persistent objects");
2597     // elements
2598     for ( int partindex = 0; partindex<=(_elemCount>>TNC_PART_SHIFT); partindex++ ) {
2599         ldomNode * part = _elemList[partindex];
2600         if ( part ) {
2601             int n0 = TNC_PART_LEN * partindex;
2602             for ( int i=0; i<TNC_PART_LEN && n0+i<=_elemCount; i++ )
2603                 if ( !part[i].isNull() && !part[i].isPersistent() ) {
2604                     part[i].persist();
2605                     if (maxTime.expired())
2606                         return;
2607                 }
2608         }
2609     }
2610     //_cacheFile->flush(false); // intermediate flush
2611     if ( maxTime.expired() )
2612         return;
2613     // texts
2614     for ( int partindex = 0; partindex<=(_textCount>>TNC_PART_SHIFT); partindex++ ) {
2615         ldomNode * part = _textList[partindex];
2616         if ( part ) {
2617             int n0 = TNC_PART_LEN * partindex;
2618             for ( int i=0; i<TNC_PART_LEN && n0+i<=_textCount; i++ )
2619                 if ( !part[i].isNull() && !part[i].isPersistent() ) {
2620                     //CRLog::trace("before persist");
2621                     part[i].persist();
2622                     //CRLog::trace("after persist");
2623                     if (maxTime.expired())
2624                         return;
2625                 }
2626         }
2627     }
2628     //_cacheFile->flush(false); // intermediate flush
2629 }
2630 #endif
2631 
2632 
2633 /*
2634 
2635   Struct Node
2636   { document, nodeid&type, address }
2637 
2638   Data Offset format
2639 
2640   Chunk index, offset, type.
2641 
2642   getDataPtr( lUInt32 address )
2643   {
2644      return (address & TYPE_MASK) ? textStorage.get( address & ~TYPE_MASK ) : elementStorage.get( address & ~TYPE_MASK );
2645   }
2646 
2647   index->instance, data
2648   >
2649   [index] { vtable, doc, id, dataptr } // 16 bytes per node
2650 
2651 
2652  */
2653 
2654 
2655 /// saves all unsaved chunks to cache file
save(CRTimerUtil & maxTime)2656 bool ldomDataStorageManager::save( CRTimerUtil & maxTime )
2657 {
2658     bool res = true;
2659 #if BUILD_LITE!=1
2660     if ( !_cache )
2661         return true;
2662     for ( int i=0; i<_chunks.length(); i++ ) {
2663         if ( !_chunks[i]->save() ) {
2664             res = false;
2665             break;
2666         }
2667         //CRLog::trace("time elapsed: %d", (int)maxTime.elapsed());
2668         if (maxTime.expired())
2669             return res;
2670 //        if ( (i&3)==3 &&  maxTime.expired() )
2671 //            return res;
2672     }
2673     if (!maxTime.infinite())
2674         _cache->flush(false, maxTime); // intermediate flush
2675     if ( maxTime.expired() )
2676         return res;
2677     if ( !res )
2678         return false;
2679     // save chunk index
2680     int n = _chunks.length();
2681     SerialBuf buf(n*4+4, true);
2682     buf << (lUInt32)n;
2683     for ( int i=0; i<n; i++ ) {
2684         buf << (lUInt32)_chunks[i]->_bufpos;
2685     }
2686     res = _cache->write( cacheType(), 0xFFFF, buf, COMPRESS_NODE_STORAGE_DATA );
2687     if ( !res ) {
2688         CRLog::error("ldomDataStorageManager::save() - Cannot write chunk index");
2689     }
2690 #endif
2691     return res;
2692 }
2693 
2694 /// load chunk index from cache file
load()2695 bool ldomDataStorageManager::load()
2696 {
2697 #if BUILD_LITE!=1
2698     if ( !_cache )
2699         return false;
2700     //load chunk index
2701     SerialBuf buf(0, true);
2702     if ( !_cache->read( cacheType(), 0xFFFF, buf ) ) {
2703         CRLog::error("ldomDataStorageManager::load() - Cannot read chunk index");
2704         return false;
2705     }
2706     lUInt32 n;
2707     buf >> n;
2708     if (n > 10000)
2709         return false; // invalid
2710     _recentChunk = NULL;
2711     _chunks.clear();
2712     lUInt32 compsize = 0;
2713     lUInt32 uncompsize = 0;
2714     for (lUInt32 i=0; i<n; i++ ) {
2715         buf >> uncompsize;
2716         if ( buf.error() ) {
2717             _chunks.clear();
2718             return false;
2719         }
2720         _chunks.add( new ldomTextStorageChunk( this, (lUInt16)i,compsize, uncompsize ) );
2721     }
2722     return true;
2723 #else
2724     return false;
2725 #endif
2726 }
2727 
2728 /// get chunk pointer and update usage data
getChunk(lUInt32 address)2729 ldomTextStorageChunk * ldomDataStorageManager::getChunk( lUInt32 address )
2730 {
2731     ldomTextStorageChunk * chunk = _chunks[address>>16];
2732     if ( chunk!=_recentChunk ) {
2733         if ( chunk->_prevRecent )
2734             chunk->_prevRecent->_nextRecent = chunk->_nextRecent;
2735         if ( chunk->_nextRecent )
2736             chunk->_nextRecent->_prevRecent = chunk->_prevRecent;
2737         chunk->_prevRecent = NULL;
2738         if (((chunk->_nextRecent = _recentChunk)))
2739             _recentChunk->_prevRecent = chunk;
2740         _recentChunk = chunk;
2741     }
2742     chunk->ensureUnpacked();
2743     return chunk;
2744 }
2745 
setCache(CacheFile * cache)2746 void ldomDataStorageManager::setCache( CacheFile * cache )
2747 {
2748     _cache = cache;
2749 }
2750 
2751 /// type
cacheType()2752 lUInt16 ldomDataStorageManager::cacheType()
2753 {
2754     switch ( _type ) {
2755     case 't':
2756         return CBT_TEXT_DATA;
2757     case 'e':
2758         return CBT_ELEM_DATA;
2759     case 'r':
2760         return CBT_RECT_DATA;
2761     case 's':
2762         return CBT_ELEM_STYLE_DATA;
2763     }
2764     return 0;
2765 }
2766 
2767 /// get or allocate space for element style data item
getStyleData(lUInt32 elemDataIndex,ldomNodeStyleInfo * dst)2768 void ldomDataStorageManager::getStyleData( lUInt32 elemDataIndex, ldomNodeStyleInfo * dst )
2769 {
2770     // assume storage has raw data chunks
2771     int index = elemDataIndex>>4; // element sequential index
2772     int chunkIndex = index >> STYLE_DATA_CHUNK_ITEMS_SHIFT;
2773     while ( _chunks.length() <= chunkIndex ) {
2774         //if ( _chunks.length()>0 )
2775         //    _chunks[_chunks.length()-1]->compact();
2776         _chunks.add( new ldomTextStorageChunk(STYLE_DATA_CHUNK_SIZE, this, _chunks.length()) );
2777         getChunk( (_chunks.length()-1)<<16 );
2778         compact( 0 );
2779     }
2780     ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2781     int offsetIndex = index & STYLE_DATA_CHUNK_MASK;
2782     chunk->getRaw( offsetIndex * sizeof(ldomNodeStyleInfo), sizeof(ldomNodeStyleInfo), (lUInt8 *)dst );
2783 }
2784 
2785 /// set element style data item
setStyleData(lUInt32 elemDataIndex,const ldomNodeStyleInfo * src)2786 void ldomDataStorageManager::setStyleData( lUInt32 elemDataIndex, const ldomNodeStyleInfo * src )
2787 {
2788     // assume storage has raw data chunks
2789     int index = elemDataIndex>>4; // element sequential index
2790     int chunkIndex = index >> STYLE_DATA_CHUNK_ITEMS_SHIFT;
2791     while ( _chunks.length() <= chunkIndex ) {
2792         //if ( _chunks.length()>0 )
2793         //    _chunks[_chunks.length()-1]->compact();
2794         _chunks.add( new ldomTextStorageChunk(STYLE_DATA_CHUNK_SIZE, this, _chunks.length()) );
2795         getChunk( (_chunks.length()-1)<<16 );
2796         compact( 0 );
2797     }
2798     ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2799     int offsetIndex = index & STYLE_DATA_CHUNK_MASK;
2800     chunk->setRaw( offsetIndex * sizeof(ldomNodeStyleInfo), sizeof(ldomNodeStyleInfo), (const lUInt8 *)src );
2801 }
2802 
2803 
2804 /// get or allocate space for rect data item
getRendRectData(lUInt32 elemDataIndex,lvdomElementFormatRec * dst)2805 void ldomDataStorageManager::getRendRectData( lUInt32 elemDataIndex, lvdomElementFormatRec * dst )
2806 {
2807     // assume storage has raw data chunks
2808     int index = elemDataIndex>>4; // element sequential index
2809     int chunkIndex = index >> RECT_DATA_CHUNK_ITEMS_SHIFT;
2810     while ( _chunks.length() <= chunkIndex ) {
2811         //if ( _chunks.length()>0 )
2812         //    _chunks[_chunks.length()-1]->compact();
2813         _chunks.add( new ldomTextStorageChunk(RECT_DATA_CHUNK_SIZE, this, _chunks.length()) );
2814         getChunk( (_chunks.length()-1)<<16 );
2815         compact( 0 );
2816     }
2817     ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2818     int offsetIndex = index & RECT_DATA_CHUNK_MASK;
2819     chunk->getRaw( offsetIndex * sizeof(lvdomElementFormatRec), sizeof(lvdomElementFormatRec), (lUInt8 *)dst );
2820 }
2821 
2822 /// set rect data item
setRendRectData(lUInt32 elemDataIndex,const lvdomElementFormatRec * src)2823 void ldomDataStorageManager::setRendRectData( lUInt32 elemDataIndex, const lvdomElementFormatRec * src )
2824 {
2825     // assume storage has raw data chunks
2826     int index = elemDataIndex>>4; // element sequential index
2827     int chunkIndex = index >> RECT_DATA_CHUNK_ITEMS_SHIFT;
2828     while ( _chunks.length() <= chunkIndex ) {
2829         //if ( _chunks.length()>0 )
2830         //    _chunks[_chunks.length()-1]->compact();
2831         _chunks.add( new ldomTextStorageChunk(RECT_DATA_CHUNK_SIZE, this, _chunks.length()) );
2832         getChunk( (_chunks.length()-1)<<16 );
2833         compact( 0 );
2834     }
2835     ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2836     int offsetIndex = index & RECT_DATA_CHUNK_MASK;
2837     chunk->setRaw( offsetIndex * sizeof(lvdomElementFormatRec), sizeof(lvdomElementFormatRec), (const lUInt8 *)src );
2838 }
2839 
2840 #if BUILD_LITE!=1
allocText(lUInt32 dataIndex,lUInt32 parentIndex,const lString8 & text)2841 lUInt32 ldomDataStorageManager::allocText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text )
2842 {
2843     if ( !_activeChunk ) {
2844         _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2845         _chunks.add( _activeChunk );
2846         getChunk( (_chunks.length()-1)<<16 );
2847         compact( 0 );
2848     }
2849     int offset = _activeChunk->addText( dataIndex, parentIndex, text );
2850     if ( offset<0 ) {
2851         // no space in current chunk, add one more chunk
2852         //_activeChunk->compact();
2853         _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2854         _chunks.add( _activeChunk );
2855         getChunk( (_chunks.length()-1)<<16 );
2856         compact( 0 );
2857         offset = _activeChunk->addText( dataIndex, parentIndex, text );
2858         if ( offset<0 )
2859             crFatalError(1001, "Unexpected error while allocation of text");
2860     }
2861     return offset | (_activeChunk->getIndex()<<16);
2862 }
2863 
allocElem(lUInt32 dataIndex,lUInt32 parentIndex,int childCount,int attrCount)2864 lUInt32 ldomDataStorageManager::allocElem( lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount )
2865 {
2866     if ( !_activeChunk ) {
2867         _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2868         _chunks.add( _activeChunk );
2869         getChunk( (_chunks.length()-1)<<16 );
2870         compact( 0 );
2871     }
2872     int offset = _activeChunk->addElem( dataIndex, parentIndex, childCount, attrCount );
2873     if ( offset<0 ) {
2874         // no space in current chunk, add one more chunk
2875         //_activeChunk->compact();
2876         _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2877         _chunks.add( _activeChunk );
2878         getChunk( (_chunks.length()-1)<<16 );
2879         compact( 0 );
2880         offset = _activeChunk->addElem( dataIndex, parentIndex, childCount, attrCount );
2881         if ( offset<0 )
2882             crFatalError(1002, "Unexpected error while allocation of element");
2883     }
2884     return offset | (_activeChunk->getIndex()<<16);
2885 }
2886 
2887 /// call to invalidate chunk if content is modified
modified(lUInt32 addr)2888 void ldomDataStorageManager::modified( lUInt32 addr )
2889 {
2890     ldomTextStorageChunk * chunk = getChunk(addr);
2891     chunk->modified();
2892 }
2893 
2894 /// change node's parent
setParent(lUInt32 address,lUInt32 parent)2895 bool ldomDataStorageManager::setParent( lUInt32 address, lUInt32 parent )
2896 {
2897     ldomTextStorageChunk * chunk = getChunk(address);
2898     return chunk->setParent(address&0xFFFF, parent);
2899 }
2900 
2901 /// free data item
freeNode(lUInt32 addr)2902 void ldomDataStorageManager::freeNode( lUInt32 addr )
2903 {
2904     ldomTextStorageChunk * chunk = getChunk(addr);
2905     chunk->freeNode(addr&0xFFFF);
2906 }
2907 
2908 
getText(lUInt32 address)2909 lString8 ldomDataStorageManager::getText( lUInt32 address )
2910 {
2911     ldomTextStorageChunk * chunk = getChunk(address);
2912     return chunk->getText(address&0xFFFF);
2913 }
2914 
2915 /// get pointer to element data
getElem(lUInt32 addr)2916 ElementDataStorageItem * ldomDataStorageManager::getElem( lUInt32 addr )
2917 {
2918     ldomTextStorageChunk * chunk = getChunk(addr);
2919     return chunk->getElem(addr&0xFFFF);
2920 }
2921 
2922 /// returns node's parent by address
getParent(lUInt32 addr)2923 lUInt32 ldomDataStorageManager::getParent( lUInt32 addr )
2924 {
2925     ldomTextStorageChunk * chunk = getChunk(addr);
2926     return chunk->getElem(addr&0xFFFF)->parentIndex;
2927 }
2928 #endif
2929 
compact(int reservedSpace,const ldomTextStorageChunk * excludedChunk)2930 void ldomDataStorageManager::compact( int reservedSpace, const ldomTextStorageChunk* excludedChunk )
2931 {
2932 #if BUILD_LITE!=1
2933     if ( _uncompressedSize + reservedSpace > _maxUncompressedSize + _maxUncompressedSize/10 ) { // allow +10% overflow
2934         if (!_maxSizeReachedWarned) {
2935             // Log once to stdout that we reached maxUncompressedSize, so we can know
2936             // of this fact and consider it as a possible cause for crengine bugs
2937             CRLog::warn("Storage for %s reached max allowed uncompressed size (%u > %u)",
2938                         (_type == 't' ? "TEXT NODES" : (_type == 'e' ? "ELEMENTS" : (_type == 'r' ? "RENDERED RECTS" : (_type == 's' ? "ELEMENTS' STYLE DATA" : "OTHER")))),
2939                         _uncompressedSize, _maxUncompressedSize);
2940             CRLog::warn(" -> check settings.");
2941             _maxSizeReachedWarned = true; // warn only once
2942         }
2943         _owner->setCacheFileStale(true); // we may write: consider cache file stale
2944         // do compacting
2945         int sumsize = reservedSpace;
2946         for ( ldomTextStorageChunk * p = _recentChunk; p; p = p->_nextRecent ) {
2947             if ( (int)p->_bufsize + sumsize < _maxUncompressedSize ||
2948                  (p==_activeChunk && reservedSpace<0xFFFFFFF) ||
2949                  p == excludedChunk) {
2950 				// fits
2951 				sumsize += p->_bufsize;
2952 			} else {
2953 				if ( !_cache )
2954 					_owner->createCacheFile();
2955 				if ( _cache ) {
2956 					if ( !p->swapToCache(true) ) {
2957 						crFatalError(111, "Swap file writing error!");
2958 					}
2959 				}
2960 			}
2961         }
2962 
2963     }
2964 #endif
2965 }
2966 
2967 // max 512K of uncompressed data (~8 chunks)
2968 #define DEF_MAX_UNCOMPRESSED_SIZE 0x80000
ldomDataStorageManager(tinyNodeCollection * owner,char type,lUInt32 maxUnpackedSize,lUInt32 chunkSize)2969 ldomDataStorageManager::ldomDataStorageManager( tinyNodeCollection * owner, char type, lUInt32 maxUnpackedSize, lUInt32 chunkSize )
2970 : _owner( owner )
2971 , _activeChunk(NULL)
2972 , _recentChunk(NULL)
2973 , _cache(NULL)
2974 , _uncompressedSize(0)
2975 , _maxUncompressedSize(maxUnpackedSize)
2976 , _chunkSize(chunkSize)
2977 , _type(type)
2978 , _maxSizeReachedWarned(false)
2979 {
2980 }
2981 
~ldomDataStorageManager()2982 ldomDataStorageManager::~ldomDataStorageManager()
2983 {
2984 }
2985 
2986 /// create chunk to be read from cache file
ldomTextStorageChunk(ldomDataStorageManager * manager,lUInt16 index,lUInt32 compsize,lUInt32 uncompsize)2987 ldomTextStorageChunk::ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index, lUInt32 compsize, lUInt32 uncompsize)
2988 	: _manager(manager)
2989 	, _nextRecent(NULL)
2990 	, _prevRecent(NULL)
2991 	, _buf(NULL)   /// buffer for uncompressed data
2992 	, _bufsize(0)    /// _buf (uncompressed) area size, bytes
2993 	, _bufpos(uncompsize)     /// _buf (uncompressed) data write position (for appending of new data)
2994 	, _index(index)      /// ? index of chunk in storage
2995 	, _type( manager->_type )
2996 	, _saved(true)
2997 {
2998     CR_UNUSED(compsize);
2999 }
3000 
ldomTextStorageChunk(lUInt32 preAllocSize,ldomDataStorageManager * manager,lUInt16 index)3001 ldomTextStorageChunk::ldomTextStorageChunk(lUInt32 preAllocSize, ldomDataStorageManager * manager, lUInt16 index)
3002 	: _manager(manager)
3003 	, _nextRecent(NULL)
3004 	, _prevRecent(NULL)
3005 	, _buf(NULL)   /// buffer for uncompressed data
3006 	, _bufsize(preAllocSize)    /// _buf (uncompressed) area size, bytes
3007 	, _bufpos(preAllocSize)     /// _buf (uncompressed) data write position (for appending of new data)
3008 	, _index(index)      /// ? index of chunk in storage
3009 	, _type( manager->_type )
3010 	, _saved(false)
3011 {
3012     _buf = (lUInt8*)calloc(preAllocSize, sizeof(*_buf));
3013     _manager->_uncompressedSize += _bufsize;
3014 }
3015 
ldomTextStorageChunk(ldomDataStorageManager * manager,lUInt16 index)3016 ldomTextStorageChunk::ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index)
3017 	: _manager(manager)
3018 	, _nextRecent(NULL)
3019 	, _prevRecent(NULL)
3020 	, _buf(NULL)   /// buffer for uncompressed data
3021 	, _bufsize(0)    /// _buf (uncompressed) area size, bytes
3022 	, _bufpos(0)     /// _buf (uncompressed) data write position (for appending of new data)
3023 	, _index(index)      /// ? index of chunk in storage
3024 	, _type( manager->_type )
3025 	, _saved(false)
3026 {
3027 }
3028 
3029 #if BUILD_LITE!=1
3030 /// saves data to cache file, if unsaved
save()3031 bool ldomTextStorageChunk::save()
3032 {
3033     if ( !_saved )
3034         return swapToCache(false);
3035     return true;
3036 }
3037 #endif
3038 
~ldomTextStorageChunk()3039 ldomTextStorageChunk::~ldomTextStorageChunk()
3040 {
3041     setunpacked(NULL, 0);
3042 }
3043 
3044 
3045 #if BUILD_LITE!=1
3046 /// pack data, and remove unpacked, put packed data to cache file
swapToCache(bool removeFromMemory)3047 bool ldomTextStorageChunk::swapToCache( bool removeFromMemory )
3048 {
3049     if ( !_manager->_cache )
3050         return true;
3051     if ( _buf ) {
3052         if ( !_saved && _manager->_cache) {
3053 #if DEBUG_DOM_STORAGE==1
3054             CRLog::debug("Writing %d bytes of chunk %c%d to cache", _bufpos, _type, _index);
3055 #endif
3056             if ( !_manager->_cache->write( _manager->cacheType(), _index, _buf, _bufpos, COMPRESS_NODE_STORAGE_DATA) ) {
3057                 CRLog::error("Error while swapping of chunk %c%d to cache file", _type, _index);
3058                 crFatalError(-1, "Error while swapping of chunk to cache file");
3059                 return false;
3060             }
3061             _saved = true;
3062         }
3063     }
3064     if ( removeFromMemory ) {
3065         setunpacked(NULL, 0);
3066     }
3067     return true;
3068 }
3069 
3070 /// read packed data from cache
restoreFromCache()3071 bool ldomTextStorageChunk::restoreFromCache()
3072 {
3073     if ( _buf )
3074         return true;
3075     if ( !_saved )
3076         return false;
3077     int size;
3078     if ( !_manager->_cache->read( _manager->cacheType(), _index, _buf, size ) )
3079         return false;
3080     _bufsize = size;
3081     _manager->_uncompressedSize += _bufsize;
3082 #if DEBUG_DOM_STORAGE==1
3083     CRLog::debug("Read %d bytes of chunk %c%d from cache", _bufsize, _type, _index);
3084 #endif
3085     return true;
3086 }
3087 #endif
3088 
3089 /// get raw data bytes
getRaw(int offset,int size,lUInt8 * buf)3090 void ldomTextStorageChunk::getRaw( int offset, int size, lUInt8 * buf )
3091 {
3092 #ifdef _DEBUG
3093     if ( !_buf || offset+size>(int)_bufpos || offset+size>(int)_bufsize )
3094         crFatalError(123, "ldomTextStorageChunk: Invalid raw data buffer position");
3095 #endif
3096     memcpy( buf, _buf+offset, size );
3097 }
3098 
3099 /// set raw data bytes
setRaw(int offset,int size,const lUInt8 * buf)3100 void ldomTextStorageChunk::setRaw( int offset, int size, const lUInt8 * buf )
3101 {
3102 #ifdef _DEBUG
3103     if ( !_buf || offset+size>(int)_bufpos || offset+size>(int)_bufsize )
3104         crFatalError(123, "ldomTextStorageChunk: Invalid raw data buffer position");
3105 #endif
3106     if (memcmp(_buf+offset, buf, size) != 0) {
3107         memcpy(_buf+offset, buf, size);
3108         modified();
3109     }
3110 }
3111 
3112 
3113 /// returns free space in buffer
space()3114 int ldomTextStorageChunk::space()
3115 {
3116     return _bufsize - _bufpos;
3117 }
3118 
3119 #if BUILD_LITE!=1
3120 /// returns free space in buffer
addText(lUInt32 dataIndex,lUInt32 parentIndex,const lString8 & text)3121 int ldomTextStorageChunk::addText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text )
3122 {
3123     int itemsize = (sizeof(TextDataStorageItem)+text.length()-2 + 15) & 0xFFFFFFF0;
3124     if ( !_buf ) {
3125         // create new buffer, if necessary
3126         _bufsize = _manager->_chunkSize > itemsize ? _manager->_chunkSize : itemsize;
3127         _buf = (lUInt8*)calloc(_bufsize, sizeof(*_buf));
3128         _bufpos = 0;
3129         _manager->_uncompressedSize += _bufsize;
3130     }
3131     if ( (int)_bufsize - (int)_bufpos < itemsize )
3132         return -1;
3133     TextDataStorageItem * p = (TextDataStorageItem*)(_buf + _bufpos);
3134     p->sizeDiv16 = (lUInt16)(itemsize >> 4);
3135     p->dataIndex = dataIndex;
3136     p->parentIndex = parentIndex;
3137     p->type = LXML_TEXT_NODE;
3138     p->length = (lUInt16)text.length();
3139     memcpy(p->text, text.c_str(), p->length);
3140     int res = _bufpos >> 4;
3141     _bufpos += itemsize;
3142     return res;
3143 }
3144 
3145 /// adds new element item to buffer, returns offset inside chunk of stored data
addElem(lUInt32 dataIndex,lUInt32 parentIndex,int childCount,int attrCount)3146 int ldomTextStorageChunk::addElem(lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount)
3147 {
3148     int itemsize = (sizeof(ElementDataStorageItem) + attrCount*(sizeof(lUInt16)*2 + sizeof(lUInt32)) + childCount*sizeof(lUInt32) - sizeof(lUInt32) + 15) & 0xFFFFFFF0;
3149     if ( !_buf ) {
3150         // create new buffer, if necessary
3151         _bufsize = _manager->_chunkSize > itemsize ? _manager->_chunkSize : itemsize;
3152         _buf = (lUInt8*)calloc(_bufsize, sizeof(*_buf));
3153         _bufpos = 0;
3154         _manager->_uncompressedSize += _bufsize;
3155     }
3156     if ( _bufsize - _bufpos < (unsigned)itemsize )
3157         return -1;
3158     ElementDataStorageItem *item = (ElementDataStorageItem *)(_buf + _bufpos);
3159     if ( item ) {
3160         item->sizeDiv16 = (lUInt16)(itemsize >> 4);
3161         item->dataIndex = dataIndex;
3162         item->parentIndex = parentIndex;
3163         item->type = LXML_ELEMENT_NODE;
3164         item->parentIndex = parentIndex;
3165         item->attrCount = (lUInt16)attrCount;
3166         item->childCount = childCount;
3167     }
3168     int res = _bufpos >> 4;
3169     _bufpos += itemsize;
3170     return res;
3171 }
3172 
3173 /// set node parent by offset
setParent(int offset,lUInt32 parentIndex)3174 bool ldomTextStorageChunk::setParent( int offset, lUInt32 parentIndex )
3175 {
3176     offset <<= 4;
3177     if ( offset>=0 && offset<(int)_bufpos ) {
3178         TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3179         if ( (int)parentIndex!=item->parentIndex ) {
3180             item->parentIndex = parentIndex;
3181             modified();
3182             return true;
3183         } else
3184             return false;
3185     }
3186     CRLog::error("Offset %d is out of bounds (%d) for storage chunk %c%d, chunkCount=%d", offset, this->_bufpos, this->_type, this->_index, _manager->_chunks.length() );
3187     return false;
3188 }
3189 
3190 
3191 /// get text node parent by offset
getParent(int offset)3192 lUInt32 ldomTextStorageChunk::getParent( int offset )
3193 {
3194     offset <<= 4;
3195     if ( offset>=0 && offset<(int)_bufpos ) {
3196         TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3197         return item->parentIndex;
3198     }
3199     CRLog::error("Offset %d is out of bounds (%d) for storage chunk %c%d, chunkCount=%d", offset, this->_bufpos, this->_type, this->_index, _manager->_chunks.length() );
3200     return 0;
3201 }
3202 
3203 /// get pointer to element data
getElem(int offset)3204 ElementDataStorageItem * ldomTextStorageChunk::getElem( int offset  )
3205 {
3206     offset <<= 4;
3207     if ( offset>=0 && offset<(int)_bufpos ) {
3208         ElementDataStorageItem * item = (ElementDataStorageItem *)(_buf+offset);
3209         return item;
3210     }
3211     CRLog::error("Offset %d is out of bounds (%d) for storage chunk %c%d, chunkCount=%d", offset, this->_bufpos, this->_type, this->_index, _manager->_chunks.length() );
3212     return NULL;
3213 }
3214 #endif
3215 
3216 
3217 /// call to invalidate chunk if content is modified
modified()3218 void ldomTextStorageChunk::modified()
3219 {
3220     if ( !_buf ) {
3221         CRLog::error("Modified is called for node which is not in memory");
3222     }
3223     _saved = false;
3224 }
3225 
3226 #if BUILD_LITE!=1
3227 /// free data item
freeNode(int offset)3228 void ldomTextStorageChunk::freeNode( int offset )
3229 {
3230     offset <<= 4;
3231     if ( _buf && offset>=0 && offset<(int)_bufpos ) {
3232         TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3233         if ( (item->type==LXML_TEXT_NODE || item->type==LXML_ELEMENT_NODE) && item->dataIndex ) {
3234             item->type = LXML_NO_DATA;
3235             item->dataIndex = 0;
3236             modified();
3237         }
3238     }
3239 }
3240 
3241 /// get text item from buffer by offset
getText(int offset)3242 lString8 ldomTextStorageChunk::getText( int offset )
3243 {
3244     offset <<= 4;
3245     if ( _buf && offset>=0 && offset<(int)_bufpos ) {
3246         TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3247         return item->getText8();
3248     }
3249     return lString8::empty_str;
3250 }
3251 #endif
3252 
3253 
3254 /// pack data from _buf to _compbuf
ldomPack(const lUInt8 * buf,int bufsize,lUInt8 * & dstbuf,lUInt32 & dstsize)3255 bool ldomPack( const lUInt8 * buf, int bufsize, lUInt8 * &dstbuf, lUInt32 & dstsize )
3256 {
3257     lUInt8 tmp[PACK_BUF_SIZE]; // 64K buffer for compressed data
3258     int ret;
3259     z_stream z;
3260     z.zalloc = Z_NULL;
3261     z.zfree = Z_NULL;
3262     z.opaque = Z_NULL;
3263     ret = deflateInit( &z, DOC_DATA_COMPRESSION_LEVEL );
3264     if ( ret != Z_OK )
3265         return false;
3266     z.avail_in = bufsize;
3267     z.next_in = (unsigned char *)buf;
3268     int compressed_size = 0;
3269     lUInt8 *compressed_buf = NULL;
3270     while (true) {
3271         z.avail_out = PACK_BUF_SIZE;
3272         z.next_out = tmp;
3273         ret = deflate( &z, Z_FINISH );
3274         if (ret == Z_STREAM_ERROR) { // some error occured while packing
3275             deflateEnd(&z);
3276             if (compressed_buf)
3277                 free(compressed_buf);
3278             // printf("deflate() error: %d (%d > %d)\n", ret, bufsize, compressed_size);
3279             return false;
3280         }
3281         int have = PACK_BUF_SIZE - z.avail_out;
3282         compressed_buf = cr_realloc(compressed_buf, compressed_size + have);
3283         memcpy(compressed_buf + compressed_size, tmp, have );
3284         compressed_size += have;
3285         if (z.avail_out != 0) // buffer not fully filled = deflate is done
3286             break;
3287         // printf("deflate() additional call needed (%d > %d)\n", bufsize, compressed_size);
3288     }
3289     deflateEnd(&z);
3290     dstsize = compressed_size;
3291     dstbuf = compressed_buf;
3292     // printf("deflate() done: %d > %d\n", bufsize, compressed_size);
3293     return true;
3294 }
3295 
3296 /// unpack data from _compbuf to _buf
ldomUnpack(const lUInt8 * compbuf,int compsize,lUInt8 * & dstbuf,lUInt32 & dstsize)3297 bool ldomUnpack( const lUInt8 * compbuf, int compsize, lUInt8 * &dstbuf, lUInt32 & dstsize  )
3298 {
3299     lUInt8 tmp[UNPACK_BUF_SIZE]; // 256K buffer for uncompressed data
3300     int ret;
3301     z_stream z = { 0 };
3302     z.zalloc = Z_NULL;
3303     z.zfree = Z_NULL;
3304     z.opaque = Z_NULL;
3305     ret = inflateInit( &z );
3306     if ( ret != Z_OK )
3307         return false;
3308     z.avail_in = compsize;
3309     z.next_in = (unsigned char *)compbuf;
3310     lUInt32 uncompressed_size = 0;
3311     lUInt8 *uncompressed_buf = NULL;
3312     while (true) {
3313         z.avail_out = UNPACK_BUF_SIZE;
3314         z.next_out = tmp;
3315         ret = inflate( &z, Z_SYNC_FLUSH );
3316         if (ret != Z_OK && ret != Z_STREAM_END) { // some error occured while unpacking
3317             inflateEnd(&z);
3318             if (uncompressed_buf)
3319                 free(uncompressed_buf);
3320             // printf("inflate() error: %d (%d > %d)\n", ret, compsize, uncompressed_size);
3321             return false;
3322         }
3323         lUInt32 have = UNPACK_BUF_SIZE - z.avail_out;
3324         uncompressed_buf = cr_realloc(uncompressed_buf, uncompressed_size + have);
3325         memcpy(uncompressed_buf + uncompressed_size, tmp, have );
3326         uncompressed_size += have;
3327         if (ret == Z_STREAM_END) {
3328             break;
3329         }
3330         // printf("inflate() additional call needed (%d > %d)\n", compsize, uncompressed_size);
3331     }
3332     inflateEnd(&z);
3333     dstsize = uncompressed_size;
3334     dstbuf = uncompressed_buf;
3335     // printf("inflate() done %d > %d\n", compsize, uncompressed_size);
3336     return true;
3337 }
3338 
setunpacked(const lUInt8 * buf,int bufsize)3339 void ldomTextStorageChunk::setunpacked( const lUInt8 * buf, int bufsize )
3340 {
3341     if ( _buf ) {
3342         _manager->_uncompressedSize -= _bufsize;
3343         free(_buf);
3344         _buf = NULL;
3345         _bufsize = 0;
3346     }
3347     if ( buf && bufsize ) {
3348         _bufsize = bufsize;
3349         _bufpos = bufsize;
3350         _buf = (lUInt8 *)malloc( sizeof(lUInt8) * bufsize );
3351         _manager->_uncompressedSize += _bufsize;
3352         memcpy( _buf, buf, bufsize );
3353     }
3354 }
3355 
3356 /// unpacks chunk, if packed; checks storage space, compact if necessary
ensureUnpacked()3357 void ldomTextStorageChunk::ensureUnpacked()
3358 {
3359 #if BUILD_LITE!=1
3360     if ( !_buf ) {
3361         if ( _saved ) {
3362             if ( !restoreFromCache() ) {
3363                 CRTimerUtil timer;
3364                 timer.infinite();
3365                 _manager->_cache->flush(false,timer);
3366                 CRLog::warn( "restoreFromCache() failed for chunk %c%d,will try after flush", _type, _index);
3367             if ( !restoreFromCache() ) {
3368                 CRLog::error( "restoreFromCache() failed for chunk %c%d", _type, _index);
3369                 crFatalError( 111, "restoreFromCache() failed for chunk");
3370                 }
3371             }
3372             _manager->compact( 0, this );
3373         }
3374     } else {
3375         // compact
3376     }
3377 #endif
3378 }
3379 
3380 
3381 
3382 
3383 
3384 
3385 
3386 
3387 
3388 // moved to .cpp to hide implementation
3389 // fastDOM
3390 class ldomAttributeCollection
3391 {
3392 private:
3393     lUInt16 _len;
3394     lUInt16 _size;
3395     lxmlAttribute * _list;
3396 public:
ldomAttributeCollection()3397     ldomAttributeCollection()
3398     : _len(0), _size(0), _list(NULL)
3399     {
3400     }
~ldomAttributeCollection()3401     ~ldomAttributeCollection()
3402     {
3403         if (_list)
3404             free(_list);
3405     }
operator [](int index)3406     lxmlAttribute * operator [] (int index) { return &_list[index]; }
operator [](int index) const3407     const lxmlAttribute * operator [] (int index) const { return &_list[index]; }
length() const3408     lUInt16 length() const
3409     {
3410         return _len;
3411     }
get(lUInt16 nsId,lUInt16 attrId) const3412     lUInt32 get( lUInt16 nsId, lUInt16 attrId ) const
3413     {
3414         for (lUInt16 i=0; i<_len; i++)
3415         {
3416             if (_list[i].compare( nsId, attrId ))
3417                 return _list[i].index;
3418         }
3419         return LXML_ATTR_VALUE_NONE;
3420     }
set(lUInt16 nsId,lUInt16 attrId,lUInt32 valueIndex)3421     void set( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
3422     {
3423         // find existing
3424         for (lUInt16 i=0; i<_len; i++)
3425         {
3426             if (_list[i].compare( nsId, attrId ))
3427             {
3428                 _list[i].index = valueIndex;
3429                 return;
3430             }
3431         }
3432         // add
3433         if (_len>=_size)
3434         {
3435             _size += 4;
3436             _list = cr_realloc( _list, _size );
3437         }
3438         _list[ _len++ ].setData(nsId, attrId, valueIndex);
3439     }
add(lUInt16 nsId,lUInt16 attrId,lUInt32 valueIndex)3440     void add( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
3441     {
3442         // find existing
3443         if (_len>=_size)
3444         {
3445             _size += 4;
3446             _list = cr_realloc( _list, _size );
3447         }
3448         _list[ _len++ ].setData(nsId, attrId, valueIndex);
3449     }
add(const lxmlAttribute * v)3450     void add( const lxmlAttribute * v )
3451     {
3452         // find existing
3453         if (_len>=_size)
3454         {
3455             _size += 4;
3456             _list = cr_realloc( _list, _size );
3457         }
3458         _list[ _len++ ] = *v;
3459     }
3460 };
3461 
3462 
3463 /*
3464 class simpleLogFile
3465 {
3466 public:
3467     FILE * f;
3468     simpleLogFile(const char * fname) { f = fopen( fname, "wt" ); }
3469     ~simpleLogFile() { if (f) fclose(f); }
3470     simpleLogFile & operator << ( const char * str ) { fprintf( f, "%s", str ); fflush( f ); return *this; }
3471     simpleLogFile & operator << ( int d ) { fprintf( f, "%d(0x%X) ", d, d ); fflush( f ); return *this; }
3472     simpleLogFile & operator << ( const lChar32 * str )
3473     {
3474         if (str)
3475         {
3476             for (; *str; str++ )
3477             {
3478                 fputc( *str >= 32 && *str<127 ? *str : '?', f );
3479             }
3480         }
3481         fflush( f );
3482         return *this;
3483     }
3484 };
3485 
3486 simpleLogFile logfile("logfile.log");
3487 */
3488 
3489 
3490 
3491 /////////////////////////////////////////////////////////////////
3492 /// lxmlDocument
3493 
3494 
lxmlDocBase(int)3495 lxmlDocBase::lxmlDocBase(int /*dataBufSize*/)
3496 : tinyNodeCollection(),
3497 _elementNameTable(MAX_ELEMENT_TYPE_ID)
3498 , _attrNameTable(MAX_ATTRIBUTE_TYPE_ID)
3499 , _nsNameTable(MAX_NAMESPACE_TYPE_ID)
3500 , _nextUnknownElementId(UNKNOWN_ELEMENT_TYPE_ID)
3501 , _nextUnknownAttrId(UNKNOWN_ATTRIBUTE_TYPE_ID)
3502 , _nextUnknownNsId(UNKNOWN_NAMESPACE_TYPE_ID)
3503 , _attrValueTable( DOC_STRING_HASH_SIZE )
3504 ,_idNodeMap(8192)
3505 ,_urlImageMap(1024)
3506 ,_idAttrId(0)
3507 ,_nameAttrId(0)
3508 #if BUILD_LITE!=1
3509 //,_keepData(false)
3510 //,_mapped(false)
3511 #endif
3512 #if BUILD_LITE!=1
3513 ,_pagesData(8192)
3514 #endif
3515 {
3516     // create and add one data buffer
3517     _stylesheet.setDocument( this );
3518 }
3519 
3520 /// Destructor
~lxmlDocBase()3521 lxmlDocBase::~lxmlDocBase()
3522 {
3523 }
3524 
onAttributeSet(lUInt16 attrId,lUInt32 valueId,ldomNode * node)3525 void lxmlDocBase::onAttributeSet( lUInt16 attrId, lUInt32 valueId, ldomNode * node )
3526 {
3527     if ( _idAttrId==0 )
3528         _idAttrId = _attrNameTable.idByName("id");
3529     if ( _nameAttrId==0 )
3530         _nameAttrId = _attrNameTable.idByName("name");
3531     if (attrId == _idAttrId) {
3532         _idNodeMap.set( valueId, node->getDataIndex() );
3533     } else if ( attrId==_nameAttrId ) {
3534         lString32 nodeName = node->getNodeName();
3535         if (nodeName == "a")
3536             _idNodeMap.set( valueId, node->getDataIndex() );
3537     }
3538 }
3539 
getNsNameIndex(const lChar32 * name)3540 lUInt16 lxmlDocBase::getNsNameIndex( const lChar32 * name )
3541 {
3542     const LDOMNameIdMapItem * item = _nsNameTable.findItem( name );
3543     if (item)
3544         return item->id;
3545     _nsNameTable.AddItem( _nextUnknownNsId, lString32(name), NULL );
3546     return _nextUnknownNsId++;
3547 }
3548 
getNsNameIndex(const lChar8 * name)3549 lUInt16 lxmlDocBase::getNsNameIndex( const lChar8 * name )
3550 {
3551     const LDOMNameIdMapItem * item = _nsNameTable.findItem( name );
3552     if (item)
3553         return item->id;
3554     _nsNameTable.AddItem( _nextUnknownNsId, lString32(name), NULL );
3555     return _nextUnknownNsId++;
3556 }
3557 
getAttrNameIndex(const lChar32 * name)3558 lUInt16 lxmlDocBase::getAttrNameIndex( const lChar32 * name )
3559 {
3560     const LDOMNameIdMapItem * item = _attrNameTable.findItem( name );
3561     if (item)
3562         return item->id;
3563     _attrNameTable.AddItem( _nextUnknownAttrId, lString32(name), NULL );
3564     return _nextUnknownAttrId++;
3565 }
3566 
getAttrNameIndex(const lChar8 * name)3567 lUInt16 lxmlDocBase::getAttrNameIndex( const lChar8 * name )
3568 {
3569     const LDOMNameIdMapItem * item = _attrNameTable.findItem( name );
3570     if (item)
3571         return item->id;
3572     _attrNameTable.AddItem( _nextUnknownAttrId, lString32(name), NULL );
3573     return _nextUnknownAttrId++;
3574 }
3575 
getElementNameIndex(const lChar32 * name)3576 lUInt16 lxmlDocBase::getElementNameIndex( const lChar32 * name )
3577 {
3578     const LDOMNameIdMapItem * item = _elementNameTable.findItem( name );
3579     if (item)
3580         return item->id;
3581     _elementNameTable.AddItem( _nextUnknownElementId, lString32(name), NULL );
3582     return _nextUnknownElementId++;
3583 }
3584 
findElementNameIndex(const lChar8 * name)3585 lUInt16 lxmlDocBase::findElementNameIndex( const lChar8 * name )
3586 {
3587     const LDOMNameIdMapItem * item = _elementNameTable.findItem( name );
3588     if (item)
3589         return item->id;
3590     return 0;
3591 }
3592 
getElementNameIndex(const lChar8 * name)3593 lUInt16 lxmlDocBase::getElementNameIndex( const lChar8 * name )
3594 {
3595     const LDOMNameIdMapItem * item = _elementNameTable.findItem( name );
3596     if (item)
3597         return item->id;
3598     _elementNameTable.AddItem( _nextUnknownElementId, lString32(name), NULL );
3599     return _nextUnknownElementId++;
3600 }
3601 
3602 /// create formatted text object with options set
createFormattedText()3603 LFormattedText * lxmlDocBase::createFormattedText()
3604 {
3605     LFormattedText * p = new LFormattedText();
3606     p->setImageScalingOptions(&_imgScalingOptions);
3607     p->setSpaceWidthScalePercent(_spaceWidthScalePercent);
3608     p->setMinSpaceCondensingPercent(_minSpaceCondensingPercent);
3609     p->setUnusedSpaceThresholdPercent(_unusedSpaceThresholdPercent);
3610     p->setMaxAddedLetterSpacingPercent(_maxAddedLetterSpacingPercent);
3611     p->setHighlightOptions(&_highlightOptions);
3612     return p;
3613 }
3614 
3615 /// returns main element (i.e. FictionBook for FB2)
getRootNode()3616 ldomNode * lxmlDocBase::getRootNode()
3617 {
3618     return getTinyNode(17);
3619 }
3620 
ldomDocument()3621 ldomDocument::ldomDocument()
3622 : lxmlDocBase(DEF_DOC_DATA_BUFFER_SIZE),
3623   m_toc(this)
3624 , m_pagemap(this)
3625 #if BUILD_LITE!=1
3626 , _last_docflags(0)
3627 , _page_height(0)
3628 , _page_width(0)
3629 , _rendered(false)
3630 , _just_rendered_from_cache(false)
3631 , _toc_from_cache_valid(false)
3632 , _warnings_seen_bitmap(0)
3633 #endif
3634 , lists(100)
3635 {
3636     _docIndex = ldomNode::registerDocument(this);
3637     allocTinyElement(NULL, 0, 0);
3638     // Note: valgrind reports (sometimes, when some document is opened or closed,
3639     // with metadataOnly or not) a memory leak (64 bytes in 1 blocks are definitely
3640     // lost), about this, created in allocTinyElement():
3641     //    tinyElement * elem = new tinyElement(...)
3642     // possibly because it's not anchored anywhere.
3643     // Attempt at anchoring into a _nullNode, and calling ->detroy()
3644     // in ~ldomDocument(), did not prevent this report, and caused other ones...
3645 
3646     //new ldomElement( this, NULL, 0, 0, 0 );
3647     //assert( _instanceMapCount==2 );
3648 }
3649 
3650 /// Copy constructor - copies ID tables contents
lxmlDocBase(lxmlDocBase & doc)3651 lxmlDocBase::lxmlDocBase( lxmlDocBase & doc )
3652 :    tinyNodeCollection(doc)
3653 ,   _elementNameTable(doc._elementNameTable)    // Element Name<->Id map
3654 ,   _attrNameTable(doc._attrNameTable)       // Attribute Name<->Id map
3655 ,   _nsNameTable(doc._nsNameTable)           // Namespace Name<->Id map
3656 ,   _nextUnknownElementId(doc._nextUnknownElementId) // Next Id for unknown element
3657 ,   _nextUnknownAttrId(doc._nextUnknownAttrId)    // Next Id for unknown attribute
3658 ,   _nextUnknownNsId(doc._nextUnknownNsId)      // Next Id for unknown namespace
3659     //lvdomStyleCache _styleCache;         // Style cache
3660 ,   _attrValueTable(doc._attrValueTable)
3661 ,   _idNodeMap(doc._idNodeMap)
3662 ,   _urlImageMap(1024)
3663 ,   _idAttrId(doc._idAttrId) // Id for "id" attribute name
3664 //,   _docFlags(doc._docFlags)
3665 #if BUILD_LITE!=1
3666 ,   _pagesData(8192)
3667 #endif
3668 {
3669 }
3670 
3671 /// creates empty document which is ready to be copy target of doc partial contents
ldomDocument(ldomDocument & doc)3672 ldomDocument::ldomDocument( ldomDocument & doc )
3673 : lxmlDocBase(doc)
3674 , m_toc(this)
3675 , m_pagemap(this)
3676 #if BUILD_LITE!=1
3677 , _def_font(doc._def_font) // default font
3678 , _def_style(doc._def_style)
3679 , _last_docflags(doc._last_docflags)
3680 , _page_height(doc._page_height)
3681 , _page_width(doc._page_width)
3682 #endif
3683 , _container(doc._container)
3684 , lists(100)
3685 {
3686     _docIndex = ldomNode::registerDocument(this);
3687 }
3688 
writeNode(LVStream * stream,ldomNode * node,bool treeLayout)3689 static void writeNode( LVStream * stream, ldomNode * node, bool treeLayout )
3690 {
3691     int level = 0;
3692     if ( treeLayout ) {
3693         level = node->getNodeLevel();
3694         for (int i=0; i<level; i++ )
3695             *stream << "  ";
3696     }
3697     if ( node->isText() )
3698     {
3699         lString8 txt = node->getText8();
3700         *stream << txt;
3701         if ( treeLayout )
3702             *stream << "\n";
3703     }
3704     else if (  node->isElement() )
3705     {
3706         lString8 elemName = UnicodeToUtf8(node->getNodeName());
3707         lString8 elemNsName = UnicodeToUtf8(node->getNodeNsName());
3708         if (!elemNsName.empty())
3709             elemName = elemNsName + ":" + elemName;
3710         if (!elemName.empty())
3711             *stream << "<" << elemName;
3712         int i;
3713         for (i=0; i<(int)node->getAttrCount(); i++)
3714         {
3715             const lxmlAttribute * attr = node->getAttribute(i);
3716             if (attr)
3717             {
3718                 lString8 attrName( UnicodeToUtf8(node->getDocument()->getAttrName(attr->id)) );
3719                 lString8 nsName( UnicodeToUtf8(node->getDocument()->getNsName(attr->nsid)) );
3720                 lString8 attrValue( UnicodeToUtf8(node->getDocument()->getAttrValue(attr->index)) );
3721                 *stream << " ";
3722                 if ( nsName.length() > 0 )
3723                     *stream << nsName << ":";
3724                 *stream << attrName << "=\"" << attrValue << "\"";
3725             }
3726         }
3727 
3728 #if 0
3729             if (!elemName.empty())
3730             {
3731                 ldomNode * elem = node;
3732                 lvdomElementFormatRec * fmt = elem->getRenderData();
3733                 css_style_ref_t style = elem->getStyle();
3734                 if ( fmt ) {
3735                     lvRect rect;
3736                     elem->getAbsRect( rect );
3737                     *stream << U" fmt=\"";
3738                     *stream << U"rm:" << lString32::itoa( (int)elem->getRendMethod() ) << U" ";
3739                     if ( style.isNull() )
3740                         *stream << U"style: NULL ";
3741                     else {
3742                         *stream << U"disp:" << lString32::itoa( (int)style->display ) << U" ";
3743                     }
3744                     *stream << U"y:" << lString32::itoa( (int)fmt->getY() ) << U" ";
3745                     *stream << U"h:" << lString32::itoa( (int)fmt->getHeight() ) << U" ";
3746                     *stream << U"ay:" << lString32::itoa( (int)rect.top ) << U" ";
3747                     *stream << U"ah:" << lString32::itoa( (int)rect.height() ) << U" ";
3748                     *stream << U"\"";
3749                 }
3750             }
3751 #endif
3752 
3753         if ( node->getChildCount() == 0 ) {
3754             if (!elemName.empty())
3755             {
3756                 if ( elemName[0] == '?' )
3757                     *stream << "?>";
3758                 else
3759                     *stream << "/>";
3760             }
3761             if ( treeLayout )
3762                 *stream << "\n";
3763         } else {
3764             if (!elemName.empty())
3765                 *stream << ">";
3766             if ( treeLayout )
3767                 *stream << "\n";
3768             for (i=0; i<(int)node->getChildCount(); i++)
3769             {
3770                 writeNode( stream, node->getChildNode(i), treeLayout );
3771             }
3772             if ( treeLayout ) {
3773                 for (int i=0; i<level; i++ )
3774                     *stream << "  ";
3775             }
3776             if (!elemName.empty())
3777                 *stream << "</" << elemName << ">";
3778             if ( treeLayout )
3779                 *stream << "\n";
3780         }
3781     }
3782 }
3783 
3784 // Extended version of previous function for displaying selection HTML, with tunable output
3785 #define WRITENODEEX_TEXT_HYPHENATE               0x0001 ///< add soft-hyphens where hyphenation is allowed
3786 #define WRITENODEEX_TEXT_MARK_NODE_BOUNDARIES    0x0002 ///< mark start and end of text nodes (useful when indented)
3787 #define WRITENODEEX_TEXT_SHOW_UNICODE_CODEPOINT  0x0004 ///< show unicode codepoint after char
3788 #define WRITENODEEX_TEXT_UNESCAPED               0x0008 ///< let &, < and > unescaped in text nodes (makes HTML invalid)
3789 #define WRITENODEEX_INDENT_NEWLINE               0x0010 ///< indent newlines according to node level
3790 #define WRITENODEEX_NEWLINE_BLOCK_NODES          0x0020 ///< start only nodes rendered as block/final on a new line,
3791                                                         ///  so inline elements and text nodes are stuck together
3792 #define WRITENODEEX_NEWLINE_ALL_NODES            0x0040 ///< start all nodes on a new line
3793 #define WRITENODEEX_UNUSED_1                     0x0080 ///<
3794 #define WRITENODEEX_NB_SKIPPED_CHARS             0x0100 ///< show number of skipped chars in text nodes: (...43...)
3795 #define WRITENODEEX_NB_SKIPPED_NODES             0x0200 ///< show number of skipped sibling nodes: [...17...]
3796 #define WRITENODEEX_SHOW_REND_METHOD             0x0400 ///< show rendering method at end of tag (<div ~F> =Final, <b ~i>=Inline...)
3797 #define WRITENODEEX_SHOW_MISC_INFO               0x0800 ///< show additional info (depend on context)
3798 #define WRITENODEEX_ADD_UPPER_DIR_LANG_ATTR      0x1000 ///< add dir= and lang= grabbed from upper nodes
3799 #define WRITENODEEX_GET_CSS_FILES                0x2000 ///< ensure css files that apply to initial node are returned
3800                                                         ///  in &cssFiles (needed when not starting from root node)
3801 #define WRITENODEEX_INCLUDE_STYLESHEET_ELEMENT   0x4000 ///< includes crengine <stylesheet> element in HTML
3802                                                         ///  (not done if outside of sub-tree)
3803 #define WRITENODEEX_COMPUTED_STYLES_AS_ATTR      0x8000 ///< set style='' from computed styles (not implemented)
3804 
3805 
3806 #define WNEFLAG(x) ( wflags & WRITENODEEX_##x )
3807 
writeNodeEx(LVStream * stream,ldomNode * node,lString32Collection & cssFiles,int wflags=0,ldomXPointerEx startXP=ldomXPointerEx (),ldomXPointerEx endXP=ldomXPointerEx (),int indentBaseLevel=-1)3808 static void writeNodeEx( LVStream * stream, ldomNode * node, lString32Collection & cssFiles, int wflags=0,
3809     ldomXPointerEx startXP=ldomXPointerEx(), ldomXPointerEx endXP=ldomXPointerEx(), int indentBaseLevel=-1)
3810 {
3811     bool isStartNode = false;
3812     bool isEndNode = false;
3813     bool isAfterStart = false;
3814     bool isBeforeEnd = false;
3815     bool containsStart = false;
3816     bool containsEnd = false;
3817 
3818     if ( !startXP.isNull() && !endXP.isNull() ) {
3819         ldomXPointerEx currentEXP = ldomXPointerEx(node, 0);
3820         // Use start (offset=0) of text node for comparisons, but keep original XPointers
3821         ldomXPointerEx startEXP = ldomXPointerEx( startXP );
3822         startEXP.setOffset(0);
3823         ldomXPointerEx endEXP = ldomXPointerEx( endXP );
3824         endEXP.setOffset(0);
3825         if (currentEXP == startEXP)
3826             isStartNode = true;
3827         if (currentEXP == endEXP)
3828             isEndNode = true;
3829         if ( currentEXP.compare( startEXP ) >= 0 ) {
3830             isAfterStart = true;
3831         }
3832         if ( currentEXP.compare( endEXP ) <= 0 ) {
3833             isBeforeEnd = true;
3834         }
3835         ldomNode *tmp;
3836         tmp = startXP.getNode();
3837         while (tmp) {
3838             if (tmp == node) {
3839                 containsStart = true;
3840                 break;
3841             }
3842             tmp = tmp->getParentNode();
3843         }
3844         tmp = endXP.getNode();
3845         while (tmp) {
3846             if (tmp == node) {
3847                 containsEnd = true;
3848                 break;
3849             }
3850             tmp = tmp->getParentNode();
3851         }
3852     }
3853     else {
3854         containsStart = true;
3855         containsEnd = true;
3856         isAfterStart = true;
3857         isBeforeEnd = true;
3858         // but not isStartNode nor isEndNode, as these use startXP and endXP
3859     }
3860 
3861     bool isInitialNode = false;
3862     lString32 initialDirAttribute = lString32::empty_str;
3863     lString32 initialLangAttribute = lString32::empty_str;
3864     if (indentBaseLevel < 0) { // initial call (recursive ones will have it >=0)
3865         indentBaseLevel = node->getNodeLevel();
3866         isInitialNode = true;
3867         if ( WNEFLAG(ADD_UPPER_DIR_LANG_ATTR) && !node->isRoot() ) {
3868             // Grab any dir="rtl" and lang="ar_AA" attributes from some parent node
3869             if ( !node->hasAttribute( attr_dir ) ) {
3870                 ldomNode *pnode = node->getParentNode();
3871                 for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
3872                     if ( pnode->hasAttribute(attr_dir) ) {
3873                         initialDirAttribute = pnode->getAttributeValue(attr_dir);
3874                         break;
3875                     }
3876                 }
3877             }
3878             if ( !node->hasAttribute( attr_lang ) ) {
3879                 ldomNode *pnode = node->getParentNode();
3880                 for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
3881                     if ( pnode->hasAttribute(attr_lang) ) {
3882                         initialLangAttribute = pnode->getAttributeValue(attr_lang);
3883                         break;
3884                     }
3885                 }
3886             }
3887         }
3888     }
3889     int level = node->getNodeLevel();
3890     if ( node->isText() && isAfterStart && isBeforeEnd ) {
3891         bool doNewLine =  WNEFLAG(NEWLINE_ALL_NODES);
3892         bool doIndent = doNewLine && WNEFLAG(INDENT_NEWLINE);
3893         lString32 txt = node->getText();
3894         lString8 prefix = lString8::empty_str;
3895         lString8 suffix = lString8::empty_str;
3896 
3897         if ( isEndNode ) {
3898             // show the number of chars not written after selection "(...n...)"
3899             int nodeLength = endXP.getText().length();
3900             int endOffset = endXP.getOffset();
3901             if (endOffset < nodeLength) {
3902                 txt = txt.substr(0, endOffset);
3903                 if ( WNEFLAG(NB_SKIPPED_CHARS) )
3904                     suffix << "(…" << lString8().appendDecimal(nodeLength-endOffset) << "…)";
3905             }
3906         }
3907         if ( WNEFLAG(TEXT_MARK_NODE_BOUNDARIES) ) {
3908             // We use non-ordinary chars to mark start and end of text
3909             // node, which can help noticing spaces at start or end
3910             // when NEWLINE_ALL_NODES and INDENT_NEWLINE are used.
3911             // Some candidates chars are:
3912             //   Greyish, discreet, but may be confused with parenthesis:
3913             //     prefix << "⟨"; // U+27E8 Mathematical Left Angle Bracket
3914             //     suffix << "⟩"; // U+27E9 Mathematical Right Angle Bracket
3915             //   Greyish, a bit less discreet, but won't be confused with any other casual char:
3916             //     prefix << "⟪"; // U+27EA Mathematical Left Double Angle Bracket
3917             //     suffix << "⟫"; // U+27EB Mathematical Right Double Angle Bracket
3918             //   A bit too dark, but won't be confused with any other casual char:
3919             //     prefix << "⎛"; // U+239B Left Parenthesis Upper Hook
3920             //     suffix << "⎠"; // U+23A0 Right Parenthesis Lower Hook (may have too much leading space)
3921             prefix << "⟪"; // U+27EA Mathematical Left Double Angle Bracket
3922             suffix << "⟫"; // U+27EB Mathematical Right Double Angle Bracket
3923         }
3924         if ( isStartNode ) {
3925             // show the number of chars not written before selection "(...n...)"
3926             int offset = startXP.getOffset();
3927             if (offset > 0) {
3928                 txt = txt.substr(offset);
3929                 if ( WNEFLAG(NB_SKIPPED_CHARS) )
3930                     prefix << "(…" << lString8().appendDecimal(offset) << "…)";
3931             }
3932             if ( WNEFLAG(NB_SKIPPED_NODES) ) {
3933                 // show the number of sibling nodes not written before selection "[...n..]"
3934                 int nbIgnoredPrecedingSiblings = node->getNodeIndex();
3935                 if (nbIgnoredPrecedingSiblings) {
3936                     if (doIndent)
3937                         for ( int i=indentBaseLevel; i<level; i++ )
3938                             *stream << "  ";
3939                     *stream << "[…" << lString8().appendDecimal(nbIgnoredPrecedingSiblings) << "…]";
3940                     if (doNewLine)
3941                         *stream << "\n";
3942                 }
3943             }
3944         }
3945         if (doIndent)
3946             for ( int i=indentBaseLevel; i<level; i++ )
3947                 *stream << "  ";
3948         if ( ! WNEFLAG(TEXT_UNESCAPED) ) {
3949             // Use a temporary char we're not likely to find in the DOM
3950             // (see https://en.wikipedia.org/wiki/Specials_(Unicode_block) )
3951             // for 2-steps '&' replacement (to avoid infinite loop or the
3952             // need for more complicated code)
3953             while ( txt.replace( cs32("&"), cs32(U"\xFFFF") ) ) ;
3954             while ( txt.replace( cs32(U"\xFFFF"), cs32("&amp;") ) ) ;
3955             while ( txt.replace( cs32("<"), cs32("&lt;") ) ) ;
3956             while ( txt.replace( cs32(">"), cs32("&gt;") ) ) ;
3957         }
3958         #define HYPH_MIN_WORD_LEN_TO_HYPHENATE 4
3959         #define HYPH_MAX_WORD_SIZE 64
3960         // (No hyphenation if we are showing unicode codepoint)
3961         if ( WNEFLAG(TEXT_SHOW_UNICODE_CODEPOINT) ) {
3962             *stream << prefix;
3963             for ( int i=0; i<txt.length(); i++ )
3964                 *stream << UnicodeToUtf8(txt.substr(i, 1)) << "⟨U+" << lString8().appendHex(txt[i]) << "⟩";
3965             *stream << suffix;
3966         }
3967         else if ( WNEFLAG(TEXT_HYPHENATE) && HyphMan::isEnabled() && txt.length() >= HYPH_MIN_WORD_LEN_TO_HYPHENATE ) {
3968             // Add soft-hyphens where HyphMan (with the user or language current hyphenation
3969             // settings) says hyphenation is allowed.
3970             // We do that here while we output the text to avoid the need
3971             // for temporary storage of a string with soft-hyphens added.
3972             const lChar32 * text32 = txt.c_str();
3973             int txtlen = txt.length();
3974             lUInt8 * flags = (lUInt8*)calloc(txtlen, sizeof(*flags));
3975             lUInt16 widths[HYPH_MAX_WORD_SIZE] = { 0 }; // array needed by hyphenate()
3976             // Lookup words starting from the end, just because lStr_findWordBounds()
3977             // will ensure the iteration that way.
3978             int wordpos = txtlen;
3979             while ( wordpos > 0 ) {
3980                 // lStr_findWordBounds() will find the word contained at wordpos
3981                 // (or the previous word if wordpos happens to be a space or some
3982                 // punctuation) by looking only for alpha chars in m_text.
3983                 int start, end;
3984                 lStr_findWordBounds( text32, txtlen, wordpos, start, end );
3985                 if ( end <= HYPH_MIN_WORD_LEN_TO_HYPHENATE ) {
3986                     // Too short word at start, we're done
3987                     break;
3988                 }
3989                 int len = end - start;
3990                 if ( len < HYPH_MIN_WORD_LEN_TO_HYPHENATE ) {
3991                     // Too short word found, skip it
3992                     wordpos = start - 1;
3993                     continue;
3994                 }
3995                 if ( start >= wordpos ) {
3996                     // Shouldn't happen, but let's be sure we don't get stuck
3997                     wordpos = wordpos - HYPH_MIN_WORD_LEN_TO_HYPHENATE;
3998                     continue;
3999                 }
4000                 // We have a valid word to look for hyphenation
4001                 if ( len > HYPH_MAX_WORD_SIZE ) // hyphenate() stops/truncates at 64 chars
4002                     len = HYPH_MAX_WORD_SIZE;
4003                 // Have hyphenate() set flags inside 'flags'
4004                 // (Fetching the lang_cfg for each text node is not really cheap, but
4005                 // it's easier than having to pass it to each writeNodeEx())
4006                 TextLangMan::getTextLangCfg(node)->getHyphMethod()->hyphenate(text32+start, len, widths, flags+start, 0, 0xFFFF, 1);
4007                 // Continue with previous word
4008                 wordpos = start - 1;
4009             }
4010             // Output text, and add a soft-hyphen where there are flags
4011             *stream << prefix;
4012             for ( int i=0; i<txt.length(); i++ ) {
4013                 *stream << UnicodeToUtf8(txt.substr(i, 1));
4014                 if ( flags[i] & LCHAR_ALLOW_HYPH_WRAP_AFTER )
4015                     *stream << "­";
4016             }
4017             *stream << suffix;
4018             free(flags);
4019         }
4020         else {
4021             *stream << prefix << UnicodeToUtf8(txt) << suffix;
4022         }
4023         if (doNewLine)
4024             *stream << "\n";
4025         if ( isEndNode && WNEFLAG(NB_SKIPPED_NODES) ) {
4026             // show the number of sibling nodes not written after selection "[...n..]"
4027             ldomNode * parent = node->getParentNode();
4028             int nbIgnoredFollowingSiblings = parent ? (parent->getChildCount() - 1 - node->getNodeIndex()) : 0;
4029             if (nbIgnoredFollowingSiblings) {
4030                 if (doIndent)
4031                     for ( int i=indentBaseLevel; i<level; i++ )
4032                         *stream << "  ";
4033                 *stream << "[…" << lString8().appendDecimal(nbIgnoredFollowingSiblings) << "…]";
4034                 if (doNewLine)
4035                     *stream << "\n";
4036             }
4037         }
4038     }
4039     else if ( node->isElement() ) {
4040         lString8 elemName = UnicodeToUtf8(node->getNodeName());
4041         lString8 elemNsName = UnicodeToUtf8(node->getNodeNsName());
4042         // Write elements that are between start and end, but also those that
4043         // are parents of start and end nodes
4044         bool toWrite = (isAfterStart && isBeforeEnd) || containsStart || containsEnd;
4045         bool isStylesheetTag = false;
4046         if ( node->getNodeId() == el_stylesheet ) {
4047             toWrite = false;
4048             if ( WNEFLAG(INCLUDE_STYLESHEET_ELEMENT) ) {
4049                 // We may meet a <stylesheet> tag that is not between startXP and endXP and
4050                 // does not contain any of them, but its parent (body or DocFragment) does.
4051                 // Write it if requested, as it's useful when inspecting HTML.
4052                 toWrite = true;
4053                 isStylesheetTag = true; // for specific parsing and writting
4054             }
4055         }
4056         if ( ! toWrite )
4057             return;
4058 
4059         // In case we're called (when debugging) while styles have been reset,
4060         // avoid crash on stuff like isBoxingInlineBox()/isFloatingBox() that
4061         // do check styles
4062         bool has_styles_set = !node->getStyle().isNull();
4063 
4064         bool doNewLineBeforeStartTag = false;
4065         bool doNewLineAfterStartTag = false;
4066         bool doNewLineBeforeEndTag = false; // always stays false, newline done by child elements
4067         bool doNewLineAfterEndTag = false;
4068         bool doIndentBeforeStartTag = false;
4069         bool doIndentBeforeEndTag = false;
4070         // Specific for floats and inline-blocks among inlines inside final, that
4071         // we want to show on their own lines:
4072         bool doNewlineBeforeIndentBeforeStartTag = false;
4073         bool doIndentAfterNewLineAfterEndTag = false;
4074         bool doIndentOneLevelLessAfterNewLineAfterEndTag = false;
4075         if ( WNEFLAG(NEWLINE_ALL_NODES) ) {
4076             doNewLineBeforeStartTag = true;
4077             doNewLineAfterStartTag = true;
4078             // doNewLineBeforeEndTag = false; // done by child elements
4079             doNewLineAfterEndTag = true;
4080             doIndentBeforeStartTag = WNEFLAG(INDENT_NEWLINE);
4081             doIndentBeforeEndTag = WNEFLAG(INDENT_NEWLINE);
4082         }
4083         else if ( WNEFLAG(NEWLINE_BLOCK_NODES) ) {
4084             // We consider block elements according to crengine decision for their
4085             // rendering method, which gives us a visual hint of it.
4086             lvdom_element_render_method rm = node->getRendMethod();
4087             // Text and inline nodes stay stuck together, but not all others
4088             if (rm == erm_invisible) {
4089                 // We don't know how invisible nodes would be displayed if
4090                 // they were visible. Make the invisible tree like inline
4091                 // among finals, so they don't take too much height.
4092                 if (node->getParentNode()) {
4093                     rm = node->getParentNode()->getRendMethod();
4094                     if (rm == erm_invisible || rm == erm_inline || rm == erm_final)
4095                         rm = erm_inline;
4096                     else
4097                         rm = erm_final;
4098                 }
4099             }
4100             if ( rm != erm_inline || (has_styles_set && node->isBoxingInlineBox()) ) {
4101                 doNewLineBeforeStartTag = true;
4102                 doNewLineAfterStartTag = true;
4103                 // doNewLineBeforeEndTag = false; // done by child elements
4104                 doNewLineAfterEndTag = true;
4105                 doIndentBeforeStartTag = WNEFLAG(INDENT_NEWLINE);
4106                 doIndentBeforeEndTag = WNEFLAG(INDENT_NEWLINE);
4107                 if (rm == erm_final) {
4108                     // Nodes with rend method erm_final contain only text and inline nodes.
4109                     // We want these erm_final indented, but not their content
4110                     doNewLineAfterStartTag = false;
4111                     doIndentBeforeEndTag = false;
4112                 }
4113                 else if (has_styles_set && node->isFloatingBox()) {
4114                     lvdom_element_render_method prm = node->getParentNode()->getRendMethod();
4115                     if (prm == erm_final || prm == erm_inline) {
4116                         doNewlineBeforeIndentBeforeStartTag = true;
4117                         doIndentAfterNewLineAfterEndTag = WNEFLAG(INDENT_NEWLINE);
4118                         // If we're the last node in parent collection, indent one level less,
4119                         // so that next node (the parent) is not at this node level
4120                         ldomNode * parent = node->getParentNode();
4121                         if ( parent && (node->getNodeIndex() == parent->getChildCount()-1) )
4122                             doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4123                         else if ( parent && (node->getNodeIndex() == parent->getChildCount()-2)
4124                                          && parent->getChildNode(parent->getChildCount()-1)->isText() )
4125                             doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4126                         else if ( containsEnd ) // same if next siblings won't be shown
4127                             doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4128                         // But if previous sibling node is a floating or boxing inline node
4129                         // that have done what we just did, cancel some of what we did
4130                         if ( node->getNodeIndex() > 0 ) {
4131                             ldomNode * prevsibling = parent->getChildNode(node->getNodeIndex()-1);
4132                             if ( prevsibling->isFloatingBox() || prevsibling->isBoxingInlineBox() ) {
4133                                 doNewlineBeforeIndentBeforeStartTag = false;
4134                                 doIndentBeforeStartTag = false;
4135                             }
4136                         }
4137                     }
4138                 }
4139                 else if (has_styles_set && node->isBoxingInlineBox()) {
4140                     doNewlineBeforeIndentBeforeStartTag = true;
4141                     doIndentAfterNewLineAfterEndTag = WNEFLAG(INDENT_NEWLINE);
4142                     // Same as above
4143                     ldomNode * parent = node->getParentNode();
4144                     if ( parent && (node->getNodeIndex() == parent->getChildCount()-1) )
4145                         doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4146                     else if ( parent && (node->getNodeIndex() == parent->getChildCount()-2)
4147                                      && parent->getChildNode(parent->getChildCount()-1)->isText() )
4148                         doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4149                     else if ( containsEnd )
4150                         doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4151                     if ( node->getNodeIndex() > 0 ) {
4152                         ldomNode * prevsibling = parent->getChildNode(node->getNodeIndex()-1);
4153                         if ( prevsibling->isFloatingBox() || prevsibling->isBoxingInlineBox() ) {
4154                             doNewlineBeforeIndentBeforeStartTag = false;
4155                             doIndentBeforeStartTag = false;
4156                         }
4157                     }
4158                 }
4159             }
4160         }
4161 
4162         if ( containsStart && WNEFLAG(NB_SKIPPED_NODES) ) {
4163             // Previous siblings did not contain startXP: show how many they are
4164             int nbIgnoredPrecedingSiblings = node->getNodeIndex();
4165             if (nbIgnoredPrecedingSiblings && WNEFLAG(INCLUDE_STYLESHEET_ELEMENT) &&
4166                     node->getParentNode()->getFirstChild()->isElement() &&
4167                     node->getParentNode()->getFirstChild()->getNodeId() == el_stylesheet) {
4168                 nbIgnoredPrecedingSiblings--; // we have written the <stylesheet> tag
4169             }
4170             if (nbIgnoredPrecedingSiblings) {
4171                 if (doIndentBeforeStartTag)
4172                     for ( int i=indentBaseLevel; i<level; i++ )
4173                         *stream << "  ";
4174                 *stream << "[…" << lString8().appendDecimal(nbIgnoredPrecedingSiblings) << "…]";
4175                 if (doNewLineBeforeStartTag)
4176                     *stream << "\n";
4177             }
4178         }
4179         if (doNewlineBeforeIndentBeforeStartTag)
4180             *stream << "\n";
4181         if (doIndentBeforeStartTag)
4182             for ( int i=indentBaseLevel; i<level; i++ )
4183                 *stream << "  ";
4184         if ( elemName.empty() ) {
4185             // should not happen (except for the root node, that we might have skipped)
4186             elemName = node->isRoot() ? lString8("RootNode") : (elemNsName + "???");
4187         }
4188         if ( !elemNsName.empty() )
4189             elemName = elemNsName + ":" + elemName;
4190         *stream << "<" << elemName;
4191         if ( isInitialNode ) {
4192             // Add any dir="rtl" and lang="ar_AA" attributes grabbed from some parent node
4193             if ( !initialDirAttribute.empty() ) {
4194                 *stream << " dir=\"" << UnicodeToUtf8(initialDirAttribute) << "\"";
4195             }
4196             if ( !initialLangAttribute.empty() ) {
4197                 *stream << " lang=\"" << UnicodeToUtf8(initialLangAttribute) << "\"";
4198             }
4199         }
4200         for ( int i=0; i<(int)node->getAttrCount(); i++ ) {
4201             const lxmlAttribute * attr = node->getAttribute(i);
4202             if (attr) {
4203                 lString8 attrName( UnicodeToUtf8(node->getDocument()->getAttrName(attr->id)) );
4204                 lString8 nsName( UnicodeToUtf8(node->getDocument()->getNsName(attr->nsid)) );
4205                 lString8 attrValue( UnicodeToUtf8(node->getDocument()->getAttrValue(attr->index)) );
4206                 if ( WNEFLAG(SHOW_MISC_INFO) && has_styles_set ) {
4207                     if ( node->getNodeId() == el_pseudoElem && (attr->id == attr_Before || attr->id == attr_After) ) {
4208                         // Show the rendered content as the otherwise empty Before/After attribute value
4209                         if ( WNEFLAG(TEXT_SHOW_UNICODE_CODEPOINT) ) {
4210                             lString32 content = get_applied_content_property(node);
4211                             attrValue.empty();
4212                             for ( int i=0; i<content.length(); i++ ) {
4213                                 attrValue << UnicodeToUtf8(content.substr(i, 1)) << "⟨U+" << lString8().appendHex(content[i]) << "⟩";
4214                             }
4215                         }
4216                         else {
4217                             attrValue = UnicodeToUtf8(get_applied_content_property(node));
4218                         }
4219                     }
4220                 }
4221                 *stream << " ";
4222                 if ( nsName.length() > 0 )
4223                     *stream << nsName << ":";
4224                 *stream << attrName;
4225                 if ( !attrValue.empty() ) // don't show ="" if empty
4226                     *stream << "=\"" << attrValue << "\"";
4227                 if ( attrName == "StyleSheet" ) { // gather linked css files
4228                     lString32 cssFile = node->getDocument()->getAttrValue(attr->index);
4229                     if (!cssFiles.contains(cssFile))
4230                         cssFiles.add(cssFile);
4231                 }
4232             }
4233         }
4234         if ( WNEFLAG(SHOW_REND_METHOD) ) {
4235             *stream << " ~";
4236             switch ( node->getRendMethod() ) {
4237                 case erm_invisible:          *stream << "X";     break;
4238                 case erm_killed:             *stream << "K";     break;
4239                 case erm_block:              *stream << "B";     break;
4240                 case erm_final:              *stream << "F";     break;
4241                 case erm_inline:             *stream << "i";     break;
4242                 case erm_table:              *stream << "T";     break;
4243                 case erm_table_row_group:    *stream << "TRG";   break;
4244                 case erm_table_header_group: *stream << "THG";   break;
4245                 case erm_table_footer_group: *stream << "TFG";   break;
4246                 case erm_table_row:          *stream << "TR";    break;
4247                 case erm_table_column_group: *stream << "TCG";   break;
4248                 case erm_table_column:       *stream << "TC";    break;
4249                 default:                     *stream << "?";     break;
4250             }
4251         }
4252         if ( node->getChildCount() == 0 ) {
4253             if ( elemName[0] == '?' )
4254                 *stream << "?>";
4255             else
4256                 *stream << "/>";
4257         }
4258         else {
4259             *stream << ">";
4260             if (doNewLineAfterStartTag)
4261                 *stream << "\n";
4262             if ( ! isStylesheetTag ) {
4263                 for ( int i=0; i<(int)node->getChildCount(); i++ ) {
4264                     writeNodeEx( stream, node->getChildNode(i), cssFiles, wflags, startXP, endXP, indentBaseLevel );
4265                 }
4266             }
4267             else {
4268                 // We need to parse the stylesheet tag text to extract css files path.
4269                 // We write its content without indentation and add a \n for readability.
4270                 lString8 txt = node->getText8();
4271                 int txtlen = txt.length();
4272                 if (txtlen && txt.substr(txtlen-1) != "\n") {
4273                     txt << "\n";
4274                 }
4275                 *stream << txt;
4276                 // Parse @import'ed files to gather linked css files (we don't really need to
4277                 // do recursive parsing of @import, which are very rare, we just want to get
4278                 // the 2nd++ linked css files that were put there by crengine).
4279                 const char * s = txt.c_str();
4280                 while (true) {
4281                     lString8 import_file;
4282                     if ( ! LVProcessStyleSheetImport( s, import_file ) ) {
4283                         break;
4284                     }
4285                     lString32 cssFile = LVCombinePaths( node->getAttributeValue(attr_href), Utf8ToUnicode(import_file) );
4286                     if ( !cssFile.empty() && !cssFiles.contains(cssFile) ) {
4287                         cssFiles.add(cssFile);
4288                     }
4289                 }
4290             }
4291             if (doNewLineBeforeEndTag)
4292                 *stream << "\n";
4293             if (doIndentBeforeEndTag)
4294                 for ( int i=indentBaseLevel; i<level; i++ )
4295                     *stream << "  ";
4296             *stream << "</" << elemName << ">";
4297             if ( WNEFLAG(TEXT_HYPHENATE) ) {
4298                 // Additional minor formatting tweaks for when this is going to be fed
4299                 // to some other renderer, which is usually when we request HYPHENATE.
4300                 if ( has_styles_set && node->getStyle()->display == css_d_run_in ) {
4301                     // For FB2 footnotes, add a space between the number and text,
4302                     // as none might be present in the source. If there were some,
4303                     // the other renderer will probably collapse them.
4304                     *stream << " ";
4305                 }
4306             }
4307         }
4308         if (doNewLineAfterEndTag)
4309             *stream << "\n";
4310         if (doIndentAfterNewLineAfterEndTag) {
4311             int ilevel = doIndentOneLevelLessAfterNewLineAfterEndTag ? level-1 : level;
4312             for ( int i=indentBaseLevel; i<ilevel; i++ )
4313                 *stream << "  ";
4314         }
4315         if ( containsEnd && WNEFLAG(NB_SKIPPED_NODES) ) {
4316             // Next siblings will not contain endXP and won't be written: show how many they are
4317             ldomNode * parent = node->getParentNode();
4318             int nbIgnoredFollowingSiblings = parent ? (parent->getChildCount() - 1 - node->getNodeIndex()) : 0;
4319             if (nbIgnoredFollowingSiblings) {
4320                 if (doIndentBeforeEndTag)
4321                     for ( int i=indentBaseLevel; i<level; i++ )
4322                         *stream << "  ";
4323                 *stream << "[…" << lString8().appendDecimal(nbIgnoredFollowingSiblings) << "…]";
4324                 if (doNewLineAfterEndTag)
4325                     *stream << "\n";
4326             }
4327         }
4328         if ( isInitialNode && cssFiles.length()==0 && WNEFLAG(GET_CSS_FILES) && !node->isRoot() ) {
4329             // We have gathered CSS files as we walked the DOM, which we usually
4330             // do from the root node if we want CSS files.
4331             // In case we started from an inner node, and we are requested for
4332             // CSS files - but we have none - walk the DOM back to gather them.
4333             ldomNode *pnode = node->getParentNode();
4334             for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
4335                 if ( pnode->getNodeId() == el_DocFragment || pnode->getNodeId() == el_body ) {
4336                     // The CSS file in StyleSheet="" attribute was the first one seen by
4337                     // crengine, so add it first to cssFiles
4338                     if (pnode->hasAttribute(attr_StyleSheet) ) {
4339                         lString32 cssFile = pnode->getAttributeValue(attr_StyleSheet);
4340                         if (!cssFiles.contains(cssFile))
4341                             cssFiles.add(cssFile);
4342                     }
4343                     // And then the CSS files in @import in the <stylesheet> element
4344                     if ( pnode->getChildCount() > 0 ) {
4345                         ldomNode *styleNode = pnode->getFirstChild();
4346                         if ( styleNode && styleNode->getNodeId()==el_stylesheet ) {
4347                             // Do as done above
4348                             lString8 txt = pnode->getText8();
4349                             const char * s = txt.c_str();
4350                             while (true) {
4351                                 lString8 import_file;
4352                                 if ( ! LVProcessStyleSheetImport( s, import_file ) ) {
4353                                     break;
4354                                 }
4355                                 lString32 cssFile = LVCombinePaths( pnode->getAttributeValue(attr_href), Utf8ToUnicode(import_file) );
4356                                 if ( !cssFile.empty() && !cssFiles.contains(cssFile) ) {
4357                                     cssFiles.add(cssFile);
4358                                 }
4359                             }
4360                         }
4361                     }
4362                 }
4363             }
4364         }
4365     }
4366 }
4367 
saveToStream(LVStreamRef stream,const char *,bool treeLayout)4368 bool ldomDocument::saveToStream( LVStreamRef stream, const char *, bool treeLayout )
4369 {
4370     //CRLog::trace("ldomDocument::saveToStream()");
4371     if (!stream || !getRootNode()->getChildCount())
4372         return false;
4373 
4374     *stream.get() << UnicodeToLocal(cs32(U"\xFEFF"));
4375     writeNode( stream.get(), getRootNode(), treeLayout );
4376     return true;
4377 }
4378 
printWarning(const char * msg,int warning_id)4379 void ldomDocument::printWarning(const char * msg, int warning_id) {
4380     // Provide a warning_id from 1 to 32 to have this warning emited only once
4381     // Provide 0 to have it printed it every time
4382     lUInt32 warning_bit = 0;
4383     if ( warning_id > 0 && warning_id <= 32 ) {
4384         warning_bit = 1 << (warning_id-1);
4385     }
4386     if ( !( warning_bit & _warnings_seen_bitmap) ) {
4387         printf("CRE WARNING: %s\n", msg);
4388         _warnings_seen_bitmap |= warning_bit;
4389     }
4390 }
4391 
~ldomDocument()4392 ldomDocument::~ldomDocument()
4393 {
4394 #if BUILD_LITE!=1
4395     updateMap(); // NOLINT: Call to virtual function during destruction
4396 #endif
4397     fontMan->UnregisterDocumentFonts(_docIndex);
4398     ldomNode::unregisterDocument(this);
4399 }
4400 
4401 #if BUILD_LITE!=1
4402 
4403 class LVImportStylesheetParser
4404 {
4405 public:
LVImportStylesheetParser(ldomDocument * document)4406     LVImportStylesheetParser(ldomDocument *document) :
4407         _document(document), _nestingLevel(0)
4408     {
4409     }
4410 
~LVImportStylesheetParser()4411     ~LVImportStylesheetParser()
4412     {
4413         _inProgress.clear();
4414     }
4415 
Parse(lString32 cssFile)4416     bool Parse(lString32 cssFile)
4417     {
4418         bool ret = false;
4419         if ( cssFile.empty() )
4420             return ret;
4421 
4422         lString32 codeBase = cssFile;
4423         LVExtractLastPathElement(codeBase);
4424         LVContainerRef container = _document->getContainer();
4425         if (!container.isNull()) {
4426             LVStreamRef cssStream = container->OpenStream(cssFile.c_str(), LVOM_READ);
4427             if (!cssStream.isNull()) {
4428                 lString32 css;
4429                 css << LVReadTextFile(cssStream);
4430                 int offset = _inProgress.add(cssFile);
4431                 ret = Parse(codeBase, css) || ret;
4432                 _inProgress.erase(offset, 1);
4433             }
4434         }
4435         return ret;
4436     }
4437 
Parse(lString32 codeBase,lString32 css)4438     bool Parse(lString32 codeBase, lString32 css)
4439     {
4440         bool ret = false;
4441         if ( css.empty() )
4442             return ret;
4443         lString8 css8 = UnicodeToUtf8(css);
4444         const char * s = css8.c_str();
4445 
4446         _nestingLevel += 1;
4447         while (_nestingLevel < 11) { //arbitrary limit
4448             lString8 import_file;
4449 
4450             if ( LVProcessStyleSheetImport( s, import_file ) ) {
4451                 lString32 importFilename = LVCombinePaths( codeBase, Utf8ToUnicode(import_file) );
4452                 if ( !importFilename.empty() && !_inProgress.contains(importFilename) ) {
4453                     ret = Parse(importFilename) || ret;
4454                 }
4455             } else {
4456                 break;
4457             }
4458         }
4459         _nestingLevel -= 1;
4460         return (_document->getStyleSheet()->parse(s, false, codeBase) || ret);
4461     }
4462 private:
4463     ldomDocument  *_document;
4464     lString32Collection _inProgress;
4465     int _nestingLevel;
4466 };
4467 
4468 /// renders (formats) document in memory
setRenderProps(int width,int dy,bool,int,font_ref_t def_font,int def_interline_space,CRPropRef props)4469 bool ldomDocument::setRenderProps( int width, int dy, bool /*showCover*/, int /*y0*/, font_ref_t def_font, int def_interline_space, CRPropRef props )
4470 {
4471     // Note: def_interline_space is no more used here
4472     bool changed = false;
4473     // Don't clear this cache of LFormattedText if
4474     // render props don't change.
4475     //   _renderedBlockCache.clear();
4476     changed = _imgScalingOptions.update(props, def_font->getSize()) || changed;
4477     css_style_ref_t s( new css_style_rec_t );
4478     s->display = css_d_block;
4479     s->white_space = css_ws_normal;
4480     s->text_align = css_ta_start;
4481     s->text_align_last = css_ta_auto;
4482     s->text_decoration = css_td_none;
4483     s->text_transform = css_tt_none;
4484     s->hyphenate = css_hyph_auto;
4485     s->color.type = css_val_unspecified;
4486     s->color.value = 0x000000;
4487     s->background_color.type = css_val_unspecified;
4488     s->background_color.value = 0xFFFFFF;
4489     //_def_style->background_color.type = color;
4490     //_def_style->background_color.value = 0xFFFFFF;
4491     s->page_break_before = css_pb_auto;
4492     s->page_break_after = css_pb_auto;
4493     s->page_break_inside = css_pb_auto;
4494     s->list_style_type = css_lst_disc;
4495     s->list_style_position = css_lsp_outside;
4496     s->vertical_align.type = css_val_unspecified;
4497     s->vertical_align.value = css_va_baseline;
4498     s->font_family = def_font->getFontFamily();
4499     s->font_size.type = css_val_screen_px; // we use this type, as we got the real font size from FontManager
4500     s->font_size.value = def_font->getSize();
4501     s->font_name = def_font->getTypeFace();
4502     s->font_weight = css_fw_400;
4503     s->font_style = css_fs_normal;
4504     s->font_features.type = css_val_unspecified;
4505     s->font_features.value = 0;
4506     s->text_indent.type = css_val_px;
4507     s->text_indent.value = 0;
4508     // s->line_height.type = css_val_percent;
4509     // s->line_height.value = def_interline_space << 8;
4510     s->line_height.type = css_val_unspecified;
4511     s->line_height.value = css_generic_normal; // line-height: normal
4512     s->orphans = css_orphans_widows_1; // default to allow orphans and widows
4513     s->widows = css_orphans_widows_1;
4514     s->float_ = css_f_none;
4515     s->clear = css_c_none;
4516     s->direction = css_dir_inherit;
4517     s->cr_hint.type = css_val_unspecified;
4518     s->cr_hint.value = CSS_CR_HINT_NONE;
4519     //lUInt32 defStyleHash = (((_stylesheet.getHash() * 31) + calcHash(_def_style))*31 + calcHash(_def_font));
4520     //defStyleHash = defStyleHash * 31 + getDocFlags();
4521     if ( _last_docflags != getDocFlags() ) {
4522         CRLog::trace("ldomDocument::setRenderProps() - doc flags changed");
4523         _last_docflags = getDocFlags();
4524         changed = true;
4525     }
4526     if ( calcHash(_def_style) != calcHash(s) ) {
4527         CRLog::trace("ldomDocument::setRenderProps() - style is changed");
4528         _def_style = s;
4529         changed = true;
4530     }
4531     if ( calcHash(_def_font) != calcHash(def_font)) {
4532         CRLog::trace("ldomDocument::setRenderProps() - font is changed");
4533         _def_font = def_font;
4534         changed = true;
4535     }
4536     if ( _page_height != dy && dy > 0 ) {
4537         CRLog::trace("ldomDocument::setRenderProps() - page height is changed: %d != %d", _page_height, dy);
4538         _page_height = dy;
4539         changed = true;
4540     }
4541     if ( _page_width != width && width > 0 ) {
4542         CRLog::trace("ldomDocument::setRenderProps() - page width is changed");
4543         _page_width = width;
4544         changed = true;
4545     }
4546 //    {
4547 //        lUInt32 styleHash = calcStyleHash();
4548 //        styleHash = styleHash * 31 + calcGlobalSettingsHash();
4549 //        CRLog::debug("Style hash before set root style: %x", styleHash);
4550 //    }
4551 //    getRootNode()->setFont( _def_font );
4552 //    getRootNode()->setStyle( _def_style );
4553 //    {
4554 //        lUInt32 styleHash = calcStyleHash();
4555 //        styleHash = styleHash * 31 + calcGlobalSettingsHash();
4556 //        CRLog::debug("Style hash after set root style: %x", styleHash);
4557 //    }
4558     return changed;
4559 }
4560 
dropStyles()4561 void tinyNodeCollection::dropStyles()
4562 {
4563     _styles.clear(-1);
4564     _fonts.clear(-1);
4565     resetNodeNumberingProps();
4566 
4567     int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
4568     for ( int i=0; i<count; i++ ) {
4569         int offs = i*TNC_PART_LEN;
4570         int sz = TNC_PART_LEN;
4571         if ( offs + sz > _elemCount+1 ) {
4572             sz = _elemCount+1 - offs;
4573         }
4574         ldomNode * buf = _elemList[i];
4575         for ( int j=0; j<sz; j++ ) {
4576             if ( buf[j].isElement() ) {
4577                 setNodeStyleIndex( buf[j]._handle._dataIndex, 0 );
4578                 setNodeFontIndex( buf[j]._handle._dataIndex, 0 );
4579             }
4580         }
4581     }
4582     _nodeStyleHash = 0;
4583 }
4584 
calcFinalBlocks()4585 int tinyNodeCollection::calcFinalBlocks()
4586 {
4587     int cnt = 0;
4588     int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
4589     for ( int i=0; i<count; i++ ) {
4590         int offs = i*TNC_PART_LEN;
4591         int sz = TNC_PART_LEN;
4592         if ( offs + sz > _elemCount+1 ) {
4593             sz = _elemCount+1 - offs;
4594         }
4595         ldomNode * buf = _elemList[i];
4596         for ( int j=0; j<sz; j++ ) {
4597             if ( buf[j].isElement() ) {
4598                 int rm = buf[j].getRendMethod();
4599                 if ( rm==erm_final )
4600                     cnt++;
4601             }
4602         }
4603     }
4604     return cnt;
4605 }
4606 
4607 // This is mostly only useful for FB2 stylesheet, as we no more set
4608 // anything in _docStylesheetFileName
applyDocumentStyleSheet()4609 void ldomDocument::applyDocumentStyleSheet()
4610 {
4611     if ( !getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) {
4612         CRLog::trace("applyDocumentStyleSheet() : DOC_FLAG_ENABLE_INTERNAL_STYLES is disabled");
4613         return;
4614     }
4615     if ( !_docStylesheetFileName.empty() ) {
4616         if ( getContainer().isNull() )
4617             return;
4618         if ( parseStyleSheet(_docStylesheetFileName) ) {
4619             CRLog::debug("applyDocumentStyleSheet() : Using document stylesheet from link/stylesheet from %s",
4620                          LCSTR(_docStylesheetFileName));
4621         }
4622     } else {
4623         ldomXPointer ss = createXPointer(cs32("/FictionBook/stylesheet"));
4624         if ( !ss.isNull() ) {
4625             lString32 css = ss.getText('\n');
4626             if ( !css.empty() ) {
4627                 CRLog::debug("applyDocumentStyleSheet() : Using internal FB2 document stylesheet:\n%s", LCSTR(css));
4628                 _stylesheet.parse(LCSTR(css));
4629             } else {
4630                 CRLog::trace("applyDocumentStyleSheet() : stylesheet under /FictionBook/stylesheet is empty");
4631             }
4632         } else {
4633             CRLog::trace("applyDocumentStyleSheet() : No internal FB2 stylesheet found under /FictionBook/stylesheet");
4634         }
4635     }
4636 }
4637 
parseStyleSheet(lString32 codeBase,lString32 css)4638 bool ldomDocument::parseStyleSheet(lString32 codeBase, lString32 css)
4639 {
4640     LVImportStylesheetParser parser(this);
4641     return parser.Parse(codeBase, css);
4642 }
4643 
parseStyleSheet(lString32 cssFile)4644 bool ldomDocument::parseStyleSheet(lString32 cssFile)
4645 {
4646     LVImportStylesheetParser parser(this);
4647     return parser.Parse(cssFile);
4648 }
4649 
render(LVRendPageList * pages,LVDocViewCallback * callback,int width,int dy,bool showCover,int y0,font_ref_t def_font,int def_interline_space,CRPropRef props,int usable_left_overflow,int usable_right_overflow)4650 bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy,
4651                            bool showCover, int y0, font_ref_t def_font, int def_interline_space,
4652                            CRPropRef props, int usable_left_overflow, int usable_right_overflow )
4653 {
4654     CRLog::info("Render is called for width %d, pageHeight=%d, fontFace=%s, docFlags=%d", width, dy, def_font->getTypeFace().c_str(), getDocFlags() );
4655     CRLog::trace("initializing default style...");
4656     //persist();
4657 //    {
4658 //        lUInt32 styleHash = calcStyleHash();
4659 //        styleHash = styleHash * 31 + calcGlobalSettingsHash();
4660 //        CRLog::debug("Style hash before setRenderProps: %x", styleHash);
4661 //    } //bool propsChanged =
4662     setRenderProps( width, dy, showCover, y0, def_font, def_interline_space, props );
4663 
4664     // update styles
4665 //    if ( getRootNode()->getStyle().isNull() || getRootNode()->getFont().isNull()
4666 //        || _docFlags != _hdr.render_docflags
4667 //        || width!=_hdr.render_dx || dy!=_hdr.render_dy || defStyleHash!=_hdr.stylesheet_hash ) {
4668 //        CRLog::trace("init format data...");
4669 //        getRootNode()->recurseElements( initFormatData );
4670 //    } else {
4671 //        CRLog::trace("reusing existing format data...");
4672 //    }
4673 
4674     bool was_just_rendered_from_cache = _just_rendered_from_cache; // cleared by checkRenderContext()
4675     if ( !checkRenderContext() ) {
4676         if ( _nodeDisplayStyleHashInitial == NODE_DISPLAY_STYLE_HASH_UNINITIALIZED ) { // happen when just loaded
4677             // For knowing/debugging cases when node styles set up during loading
4678             // is invalid (should happen now only when EPUB has embedded fonts
4679             // or some pseudoclass like :last-child has been met).
4680             printf("CRE: styles re-init needed after load, re-rendering\n");
4681         }
4682         CRLog::info("rendering context is changed - full render required...");
4683         // Clear LFormattedTextRef cache
4684         _renderedBlockCache.clear();
4685         CRLog::trace("init format data...");
4686         //CRLog::trace("validate 1...");
4687         //validateDocument();
4688         CRLog::trace("Dropping existing styles...");
4689         //CRLog::debug( "root style before drop style %d", getNodeStyleIndex(getRootNode()->getDataIndex()));
4690         dropStyles();
4691         //CRLog::debug( "root style after drop style %d", getNodeStyleIndex(getRootNode()->getDataIndex()));
4692 
4693         // After having dropped styles, which should have dropped most references
4694         // to fonts instances, we want to drop these fonts instances.
4695         // Mostly because some fallback fonts, possibly synthetized (fake bold and
4696         // italic) may have been instantiated in the late phase of text rendering.
4697         // We don't want such instances to be used for styles as it could cause some
4698         // cache check issues (perpetual "style hash mismatch", as these synthetised
4699         // fonts would not yet be there when loading from cache).
4700         // We need 2 gc() for a complete cleanup. The performance impact of
4701         // reinstantiating the fonts is minimal.
4702         gc(); // drop font instances that were only referenced by dropped styles
4703         gc(); // drop fallback font instances that were only referenced by dropped fonts
4704 
4705         //ldomNode * root = getRootNode();
4706         //css_style_ref_t roots = root->getStyle();
4707         //CRLog::trace("validate 2...");
4708         //validateDocument();
4709 
4710         // Reset counters (quotes nesting levels...)
4711         TextLangMan::resetCounters();
4712 
4713         CRLog::trace("Save stylesheet...");
4714         _stylesheet.push();
4715         CRLog::trace("Init node styles...");
4716         applyDocumentStyleSheet();
4717         getRootNode()->initNodeStyleRecursive( callback );
4718         CRLog::trace("Restoring stylesheet...");
4719         _stylesheet.pop();
4720 
4721         CRLog::trace("init render method...");
4722         getRootNode()->initNodeRendMethodRecursive();
4723 
4724 //        getRootNode()->setFont( _def_font );
4725 //        getRootNode()->setStyle( _def_style );
4726         updateRenderContext();
4727 
4728         // DEBUG dump of render methods
4729         //dumpRendMethods( getRootNode(), cs32(" - ") );
4730 //        lUInt32 styleHash = calcStyleHash();
4731 //        styleHash = styleHash * 31 + calcGlobalSettingsHash();
4732 //        CRLog::debug("Style hash: %x", styleHash);
4733 
4734         _rendered = false;
4735     }
4736     if ( !_rendered ) {
4737         if ( callback ) {
4738             callback->OnFormatStart();
4739         }
4740         _renderedBlockCache.reduceSize(1); // Reduce size to save some checking and trashing time
4741         setCacheFileStale(true); // new rendering: cache file will be updated
4742         _toc_from_cache_valid = false;
4743         // force recalculation of page numbers (even if not computed in this
4744         // session, they will be when loaded from cache next session)
4745         m_toc.invalidatePageNumbers();
4746         m_pagemap.invalidatePageInfo();
4747         pages->clear();
4748         if ( showCover )
4749             pages->add( new LVRendPageInfo( _page_height ) );
4750         LVRendPageContext context( pages, _page_height );
4751         int numFinalBlocks = calcFinalBlocks();
4752         CRLog::info("Final block count: %d", numFinalBlocks);
4753         context.setCallback(callback, numFinalBlocks);
4754         //updateStyles();
4755         CRLog::trace("rendering...");
4756         renderBlockElement( context, getRootNode(), 0, y0, width, usable_left_overflow, usable_right_overflow );
4757         _rendered = true;
4758     #if 0 //def _DEBUG
4759         LVStreamRef ostream = LVOpenFileStream( "test_save_after_init_rend_method.xml", LVOM_WRITE );
4760         saveToStream( ostream, "utf-16" );
4761     #endif
4762         gc();
4763         CRLog::trace("finalizing... fonts.length=%d", _fonts.length());
4764         context.Finalize();
4765         updateRenderContext();
4766         _pagesData.reset();
4767         pages->serialize( _pagesData );
4768         _renderedBlockCache.restoreSize(); // Restore original cache size
4769 
4770         if ( _nodeDisplayStyleHashInitial == NODE_DISPLAY_STYLE_HASH_UNINITIALIZED ) {
4771             // If _nodeDisplayStyleHashInitial has not been initialized from its
4772             // former value from the cache file, we use the one computed (just
4773             // above in updateRenderContext()) after the first full rendering
4774             // (which has applied styles and created the needed autoBoxing nodes
4775             // in the DOM). It is coherent with the DOM built up to now.
4776             _nodeDisplayStyleHashInitial = _nodeDisplayStyleHash;
4777             CRLog::info("Initializing _nodeDisplayStyleHashInitial after first rendering: %x", _nodeDisplayStyleHashInitial);
4778             // We also save it directly into DocFileHeader _hdr (normally,
4779             // updateRenderContext() does this, but doing it here avoids
4780             // a call and an expensive CalcStyleHash)
4781             _hdr.node_displaystyle_hash = _nodeDisplayStyleHashInitial;
4782         }
4783 
4784         if ( callback ) {
4785             callback->OnFormatEnd();
4786             callback->OnDocumentReady();
4787         }
4788 
4789         //saveChanges();
4790 
4791         //persist();
4792         dumpStatistics();
4793 
4794         return true; // full (re-)rendering done
4795 
4796     } else {
4797         CRLog::info("rendering context is not changed - no render!");
4798         if ( _pagesData.pos() ) {
4799             _pagesData.setPos(0);
4800             pages->deserialize( _pagesData );
4801         }
4802         CRLog::info("%d rendered pages found", pages->length() );
4803 
4804         if ( was_just_rendered_from_cache && callback )
4805             callback->OnDocumentReady();
4806 
4807         return false; // no (re-)rendering needed
4808     }
4809 
4810 }
4811 #endif
4812 
setNodeTypes(const elem_def_t * node_scheme)4813 void lxmlDocBase::setNodeTypes( const elem_def_t * node_scheme )
4814 {
4815     if ( !node_scheme )
4816         return;
4817     for ( ; node_scheme && node_scheme->id != 0; ++node_scheme )
4818     {
4819         _elementNameTable.AddItem(
4820             node_scheme->id,               // ID
4821             lString32(node_scheme->name),  // Name
4822             &node_scheme->props );  // ptr
4823     }
4824 }
4825 
4826 // set attribute types from table
setAttributeTypes(const attr_def_t * attr_scheme)4827 void lxmlDocBase::setAttributeTypes( const attr_def_t * attr_scheme )
4828 {
4829     if ( !attr_scheme )
4830         return;
4831     for ( ; attr_scheme && attr_scheme->id != 0; ++attr_scheme )
4832     {
4833         _attrNameTable.AddItem(
4834             attr_scheme->id,               // ID
4835             lString32(attr_scheme->name),  // Name
4836             NULL);
4837     }
4838     _idAttrId = _attrNameTable.idByName("id");
4839 }
4840 
4841 // set namespace types from table
setNameSpaceTypes(const ns_def_t * ns_scheme)4842 void lxmlDocBase::setNameSpaceTypes( const ns_def_t * ns_scheme )
4843 {
4844     if ( !ns_scheme )
4845         return;
4846     for ( ; ns_scheme && ns_scheme->id != 0; ++ns_scheme )
4847     {
4848         _nsNameTable.AddItem(
4849             ns_scheme->id,                 // ID
4850             lString32(ns_scheme->name),    // Name
4851             NULL);
4852     }
4853 }
4854 
dumpUnknownEntities(const char * fname)4855 void lxmlDocBase::dumpUnknownEntities( const char * fname )
4856 {
4857     FILE * f = fopen( fname, "wt" );
4858     if ( !f )
4859         return;
4860     fprintf(f, "Unknown elements:\n");
4861     _elementNameTable.dumpUnknownItems(f, UNKNOWN_ELEMENT_TYPE_ID);
4862     fprintf(f, "-------------------------------\n");
4863     fprintf(f, "Unknown attributes:\n");
4864     _attrNameTable.dumpUnknownItems(f, UNKNOWN_ATTRIBUTE_TYPE_ID);
4865     fprintf(f, "-------------------------------\n");
4866     fprintf(f, "Unknown namespaces:\n");
4867     _nsNameTable.dumpUnknownItems(f, UNKNOWN_NAMESPACE_TYPE_ID);
4868     fprintf(f, "-------------------------------\n");
4869     fclose(f);
4870 }
4871 
getUnknownEntities()4872 lString32Collection lxmlDocBase::getUnknownEntities()
4873 {
4874     lString32Collection unknown_entities;
4875     unknown_entities.add( _elementNameTable.getUnknownItems(UNKNOWN_ELEMENT_TYPE_ID) );
4876     unknown_entities.add( _attrNameTable.getUnknownItems(UNKNOWN_ATTRIBUTE_TYPE_ID) );
4877     unknown_entities.add( _nsNameTable.getUnknownItems(UNKNOWN_NAMESPACE_TYPE_ID) );
4878     return unknown_entities;
4879 }
4880 
4881 
4882 #if BUILD_LITE!=1
4883 static const char * id_map_list_magic = "MAPS";
4884 static const char * elem_id_map_magic = "ELEM";
4885 static const char * attr_id_map_magic = "ATTR";
4886 static const char * attr_value_map_magic = "ATTV";
4887 static const char * ns_id_map_magic =   "NMSP";
4888 static const char * node_by_id_map_magic = "NIDM";
4889 
4890 typedef struct {
4891     lUInt32 key;
4892     lUInt32 value;
4893 } id_node_map_item;
4894 
compare_id_node_map_items(const void * item1,const void * item2)4895 int compare_id_node_map_items(const void * item1, const void * item2) {
4896     id_node_map_item * v1 = (id_node_map_item*)item1;
4897     id_node_map_item * v2 = (id_node_map_item*)item2;
4898     if (v1->key > v2->key)
4899         return 1;
4900     if (v1->key < v2->key)
4901         return -1;
4902     return 0;
4903 }
4904 
4905 /// serialize to byte array (pointer will be incremented by number of bytes written)
serializeMaps(SerialBuf & buf)4906 void lxmlDocBase::serializeMaps( SerialBuf & buf )
4907 {
4908     if ( buf.error() )
4909         return;
4910     int pos = buf.pos();
4911     buf.putMagic( id_map_list_magic );
4912     buf.putMagic( elem_id_map_magic );
4913     _elementNameTable.serialize( buf );
4914     buf << _nextUnknownElementId; // Next Id for unknown element
4915     buf.putMagic( attr_id_map_magic );
4916     _attrNameTable.serialize( buf );
4917     buf << _nextUnknownAttrId;    // Next Id for unknown attribute
4918     buf.putMagic( ns_id_map_magic );
4919     _nsNameTable.serialize( buf );
4920     buf << _nextUnknownNsId;      // Next Id for unknown namespace
4921     buf.putMagic( attr_value_map_magic );
4922     _attrValueTable.serialize( buf );
4923 
4924     int start = buf.pos();
4925     buf.putMagic( node_by_id_map_magic );
4926     lUInt32 cnt = 0;
4927     {
4928         LVHashTable<lUInt32,lInt32>::iterator ii = _idNodeMap.forwardIterator();
4929         for ( LVHashTable<lUInt32,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
4930             cnt++;
4931         }
4932     }
4933     // TODO: investigate why length() doesn't work as count
4934     if ( (int)cnt!=_idNodeMap.length() )
4935         CRLog::error("_idNodeMap.length=%d doesn't match real item count %d", _idNodeMap.length(), cnt);
4936     buf << cnt;
4937     if (cnt > 0)
4938     {
4939         // sort items before serializing!
4940         id_node_map_item * array = new id_node_map_item[cnt];
4941         int i = 0;
4942         LVHashTable<lUInt32,lInt32>::iterator ii = _idNodeMap.forwardIterator();
4943         for ( LVHashTable<lUInt32,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
4944             array[i].key = (lUInt32)p->key;
4945             array[i].value = (lUInt32)p->value;
4946             i++;
4947         }
4948         qsort(array, cnt, sizeof(id_node_map_item), &compare_id_node_map_items);
4949         for (i = 0; i < (int)cnt; i++)
4950             buf << array[i].key << array[i].value;
4951         delete[] array;
4952     }
4953     buf.putMagic( node_by_id_map_magic );
4954     buf.putCRC( buf.pos() - start );
4955 
4956     buf.putCRC( buf.pos() - pos );
4957 }
4958 
4959 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserializeMaps(SerialBuf & buf)4960 bool lxmlDocBase::deserializeMaps( SerialBuf & buf )
4961 {
4962     if ( buf.error() )
4963         return false;
4964     int pos = buf.pos();
4965     buf.checkMagic( id_map_list_magic );
4966     buf.checkMagic( elem_id_map_magic );
4967     _elementNameTable.deserialize( buf );
4968     buf >> _nextUnknownElementId; // Next Id for unknown element
4969 
4970     if ( buf.error() ) {
4971         CRLog::error("Error while deserialization of Element ID map");
4972         return false;
4973     }
4974 
4975     buf.checkMagic( attr_id_map_magic );
4976     _attrNameTable.deserialize( buf );
4977     buf >> _nextUnknownAttrId;    // Next Id for unknown attribute
4978 
4979     if ( buf.error() ) {
4980         CRLog::error("Error while deserialization of Attr ID map");
4981         return false;
4982     }
4983 
4984 
4985     buf.checkMagic( ns_id_map_magic );
4986     _nsNameTable.deserialize( buf );
4987     buf >> _nextUnknownNsId;      // Next Id for unknown namespace
4988 
4989     if ( buf.error() ) {
4990         CRLog::error("Error while deserialization of NS ID map");
4991         return false;
4992     }
4993 
4994     buf.checkMagic( attr_value_map_magic );
4995     _attrValueTable.deserialize( buf );
4996 
4997     if ( buf.error() ) {
4998         CRLog::error("Error while deserialization of AttrValue map");
4999         return false;
5000     }
5001 
5002     int start = buf.pos();
5003     buf.checkMagic( node_by_id_map_magic );
5004     lUInt32 idmsize;
5005     buf >> idmsize;
5006     _idNodeMap.clear();
5007     if ( idmsize < 20000 )
5008         _idNodeMap.resize( idmsize*2 );
5009     for ( unsigned i=0; i<idmsize; i++ ) {
5010         lUInt32 key;
5011         lUInt32 value;
5012         buf >> key;
5013         buf >> value;
5014         _idNodeMap.set( key, value );
5015         if ( buf.error() )
5016             return false;
5017     }
5018     buf.checkMagic( node_by_id_map_magic );
5019 
5020     if ( buf.error() ) {
5021         CRLog::error("Error while deserialization of ID->Node map");
5022         return false;
5023     }
5024 
5025     buf.checkCRC( buf.pos() - start );
5026 
5027     if ( buf.error() ) {
5028         CRLog::error("Error while deserialization of ID->Node map - CRC check failed");
5029         return false;
5030     }
5031 
5032     buf.checkCRC( buf.pos() - pos );
5033 
5034     return !buf.error();
5035 }
5036 #endif
5037 
IsEmptySpace(const lChar32 * text,int len)5038 bool IsEmptySpace( const lChar32 * text, int len )
5039 {
5040    for (int i=0; i<len; i++)
5041       if ( text[i]!=' ' && text[i]!='\r' && text[i]!='\n' && text[i]!='\t')
5042          return false;
5043    return true;
5044 }
5045 
5046 
5047 /////////////////////////////////////////////////////////////////
5048 /// lxmlElementWriter
5049 
5050 static bool IS_FIRST_BODY = false;
5051 
ldomElementWriter(ldomDocument * document,lUInt16 nsid,lUInt16 id,ldomElementWriter * parent,bool insert_before_last_child)5052 ldomElementWriter::ldomElementWriter(ldomDocument * document, lUInt16 nsid, lUInt16 id, ldomElementWriter * parent, bool insert_before_last_child)
5053     : _parent(parent), _document(document), _tocItem(NULL), _isBlock(true), _isSection(false),
5054       _stylesheetIsSet(false), _bodyEnterCalled(false), _pseudoElementAfterChildIndex(-1)
5055 {
5056     //logfile << "{c";
5057     _typeDef = _document->getElementTypePtr( id );
5058     _flags = 0;
5059     if ( (_typeDef && _typeDef->white_space >= css_ws_pre_line) || (_parent && _parent->getFlags()&TXTFLG_PRE) )
5060         _flags |= TXTFLG_PRE; // Parse as PRE: pre-line, pre, pre-wrap and break-spaces
5061         // This will be updated in ldomElementWriter::onBodyEnter() after we have
5062         // set styles to this node, so we'll get the real white_space value to use.
5063 
5064     _isSection = (id==el_section);
5065 
5066     // Default (for elements not specified in fb2def.h) is to allow text
5067     // (except for the root node which must have children)
5068     _allowText = _typeDef ? _typeDef->allow_text : (_parent?true:false);
5069     if (_document->getDOMVersionRequested() < 20180528) { // revert what was changed 20180528
5070         // <hr>, <ul>, <ol>, <dl>, <output>, <section>, <svg> didn't allow text
5071         if ( id==el_hr || id==el_ul || id==el_ol || id==el_dl ||
5072                 id==el_output || id==el_section || id==el_svg ) {
5073             _allowText = false;
5074         }
5075         // <code> was white-space: pre
5076         if ( id==el_code ) {
5077             _flags |= TXTFLG_PRE;
5078         }
5079     }
5080 
5081     if (_parent) {
5082         lUInt32 index = _parent->getElement()->getChildCount();
5083         if ( insert_before_last_child )
5084             index--;
5085         _element = _parent->getElement()->insertChildElement( index, nsid, id );
5086     }
5087     else
5088         _element = _document->getRootNode(); //->insertChildElement( (lUInt32)-1, nsid, id );
5089     if ( id==el_body ) {
5090         if ( IS_FIRST_BODY ) {
5091             _tocItem = _document->getToc();
5092             //_tocItem->clear();
5093             IS_FIRST_BODY = false;
5094         }
5095         else {
5096             int fmt = _document->getProps()->getIntDef(DOC_PROP_FILE_FORMAT_ID, doc_format_none);
5097             if ( fmt == doc_format_fb2 || fmt == doc_format_fb3 ) {
5098                 // Add FB2 2nd++ BODYs' titles (footnotes and endnotes) in the TOC
5099                 // (but not their own children that are <section>)
5100                 _isSection = true; // this is just to have updateTocItem() called
5101                 // Also add the "NonLinear" attribute so these other BODYs are flagged
5102                 // as non-linear and can be hidden by frontend code that handles this
5103                 // (this is actually suggested by the FB2 specs: "... multiple
5104                 // bodies are used for additional information, like footnotes,
5105                 // that do not appear in the main book flow. The first body is
5106                 // presented to the reader by default, and content in the other
5107                 // bodies should be accessible by hyperlinks.")
5108                 addAttribute( 0, attr_NonLinear, U"" );
5109             }
5110         }
5111     }
5112     //logfile << "}";
5113 }
5114 
getFlags()5115 lUInt32 ldomElementWriter::getFlags()
5116 {
5117     return _flags;
5118 }
5119 
isBlockNode(ldomNode * node)5120 static bool isBlockNode( ldomNode * node )
5121 {
5122     if ( !node->isElement() )
5123         return false;
5124 #if BUILD_LITE!=1
5125     if ( node->getStyle()->display <= css_d_inline || node->getStyle()->display == css_d_none ) {
5126         return false;
5127     }
5128     return true;
5129 #else
5130     return true;
5131 #endif
5132 }
5133 
isInlineNode(ldomNode * node)5134 static bool isInlineNode( ldomNode * node )
5135 {
5136     if ( node->isText() )
5137         return true;
5138     //int d = node->getStyle()->display;
5139     //return ( d==css_d_inline || d==css_d_run_in );
5140     int m = node->getRendMethod();
5141     return m == erm_inline;
5142 }
5143 
isFloatingNode(ldomNode * node)5144 static bool isFloatingNode( ldomNode * node )
5145 {
5146     if ( node->isText() )
5147         return false;
5148     return node->getStyle()->float_ > css_f_none;
5149 }
5150 
isNotBoxWrappingNode(ldomNode * node)5151 static bool isNotBoxWrappingNode( ldomNode * node )
5152 {
5153     if ( BLOCK_RENDERING_N(node, PREPARE_FLOATBOXES) && node->getStyle()->float_ > css_f_none )
5154         return false; // floatBox
5155     // isBoxingInlineBox() already checks for BLOCK_RENDERING_BOX_INLINE_BLOCKS)
5156     return !node->isBoxingInlineBox();
5157 }
5158 
isNotBoxingInlineBoxNode(ldomNode * node)5159 static bool isNotBoxingInlineBoxNode( ldomNode * node )
5160 {
5161     return !node->isBoxingInlineBox();
5162 }
5163 
getSectionHeader(ldomNode * section)5164 static lString32 getSectionHeader( ldomNode * section )
5165 {
5166     lString32 header;
5167     if ( !section || section->getChildCount() == 0 )
5168         return header;
5169     ldomNode * child = section->getChildElementNode(0, U"title");
5170     if ( !child )
5171         return header;
5172     header = child->getText(U' ', 1024);
5173     return header;
5174 }
5175 
getPath()5176 lString32 ldomElementWriter::getPath()
5177 {
5178     if ( !_path.empty() || _element->isRoot() )
5179         return _path;
5180     _path = _parent->getPath() + "/" + _element->getXPathSegment();
5181     return _path;
5182 }
5183 
updateTocItem()5184 void ldomElementWriter::updateTocItem()
5185 {
5186     if ( !_isSection )
5187         return;
5188     if ( !_parent )
5189         return;
5190     if ( _parent->_tocItem ) { // <section> in the first <body>
5191         lString32 title = getSectionHeader( _element );
5192         //CRLog::trace("TOC ITEM: %s", LCSTR(title));
5193         _tocItem = _parent->_tocItem->addChild(title, ldomXPointer(_element,0), getPath() );
5194     }
5195     else if ( getElement()->getNodeId() == el_body ) { // 2nd, 3rd... <body>, in FB2 documents
5196         lString32 title = getSectionHeader( _element );
5197         _document->getToc()->addChild(title, ldomXPointer(_element,0), getPath() );
5198     }
5199     _isSection = false;
5200 }
5201 
onBodyEnter()5202 void ldomElementWriter::onBodyEnter()
5203 {
5204     _bodyEnterCalled = true;
5205 #if BUILD_LITE!=1
5206     //CRLog::trace("onBodyEnter() for node %04x %s", _element->getDataIndex(), LCSTR(_element->getNodeName()));
5207     if ( _document->isDefStyleSet() && _element ) {
5208         _element->initNodeStyle();
5209 //        if ( _element->getStyle().isNull() ) {
5210 //            CRLog::error("error while style initialization of element %x %s", _element->getNodeIndex(), LCSTR(_element->getNodeName()) );
5211 //            crFatalError();
5212 //        }
5213         int nb_children = _element->getChildCount();
5214         if ( nb_children > 0 ) {
5215             // The only possibility for this element being built to have children
5216             // is if the above initNodeStyle() has applied to this node some
5217             // matching selectors that had ::before or ::after, which have then
5218             // created one or two pseudoElem children. But let's be sure of that.
5219             for ( int i=0; i<nb_children; i++ ) {
5220                 ldomNode * child = _element->getChildNode(i);
5221                 if ( child->getNodeId() == el_pseudoElem ) {
5222                     if ( child->hasAttribute(attr_Before) ) {
5223                         // The "Before" pseudo element (not part of the XML)
5224                         // needs to have its style applied. As it has no
5225                         // children, we can also init its rend method.
5226                         child->initNodeStyle();
5227                         child->initNodeRendMethod();
5228                     }
5229                     else if ( child->hasAttribute(attr_After) ) {
5230                         // For the "After" pseudo element, we need to wait
5231                         // for all real children to be added, to move it
5232                         // as its right position (last), to init its style
5233                         // (because of "content:close-quote", whose nested
5234                         // level need to have seen all previous nodes to
5235                         // be accurate) and its rendering method.
5236                         // We'll do that in onBodyExit() when called for
5237                         // this node.
5238                         _pseudoElementAfterChildIndex = i;
5239                     }
5240                 }
5241             }
5242         }
5243         _isBlock = isBlockNode(_element);
5244         // If initNodeStyle() has set "white-space: pre" or alike, update _flags
5245         if ( _element->getStyle()->white_space >= css_ws_pre_line) {
5246             _flags |= TXTFLG_PRE;
5247         }
5248         else {
5249             _flags &= ~TXTFLG_PRE;
5250         }
5251     } else {
5252     }
5253     if ( _isSection ) {
5254         if ( _parent && _parent->_isSection ) {
5255             _parent->updateTocItem();
5256         }
5257 
5258     }
5259 #endif
5260 }
5261 
ensurePseudoElement(bool is_before)5262 void ldomNode::ensurePseudoElement( bool is_before ) {
5263 #if BUILD_LITE!=1
5264     // This node should have that pseudoElement, but it might already be there,
5265     // so check if there is already one, and if not, create it.
5266     // This happens usually in the initial loading phase, but it might in
5267     // a re-rendering if the pseudo element is introduced by a change in
5268     // styles (we won't be able to create a node if there's a cache file).
5269     int insertChildIndex = -1;
5270     int nb_children = getChildCount();
5271     if ( is_before ) { // ::before
5272         insertChildIndex = 0; // always to be inserted first, if not already there
5273         if ( nb_children > 0 ) {
5274             ldomNode * child = getChildNode(0); // should always be found as the first node
5275             // pseudoElem might have been wrapped by a inlineBox, autoBoxing, floatBox...
5276             while ( child && child->isBoxingNode() && child->getChildCount()>0 )
5277                 child = child->getChildNode(0);
5278             if ( child && child->getNodeId() == el_pseudoElem && child->hasAttribute(attr_Before) ) {
5279                 // Already there, no need to create it
5280                 insertChildIndex = -1;
5281             }
5282         }
5283     }
5284     else { // ::after
5285         // In the XML loading phase, this one might be either first,
5286         // or second if there's already a Before. In the re-rendering
5287         // phase, it would have been moved as the last node. In all these
5288         // cases, it is always the last at the moment we are checking.
5289         insertChildIndex = nb_children; // always to be inserted last, if not already there
5290         if ( nb_children > 0 ) {
5291             ldomNode * child = getChildNode(nb_children-1); // should always be found as the last node
5292             // pseudoElem might have been wrapped by a inlineBox, autoBoxing, floatBox...
5293             while ( child && child->isBoxingNode() && child->getChildCount()>0 )
5294                 child = child->getChildNode(0);
5295             if ( child && child->getNodeId() == el_pseudoElem && child->hasAttribute(attr_After) ) {
5296                 // Already there, no need to create it
5297                 insertChildIndex = -1;
5298             }
5299         }
5300     }
5301     if ( insertChildIndex >= 0 ) {
5302         ldomNode * pseudo = insertChildElement( insertChildIndex, LXML_NS_NONE, el_pseudoElem );
5303         lUInt16 attribute_id = is_before ? attr_Before : attr_After;
5304         pseudo->setAttributeValue(LXML_NS_NONE, attribute_id, U"");
5305         // We are called by lvrend.cpp setNodeStyle(), after the parent
5306         // style and font have been fully set up. We could set this pseudo
5307         // element style with pseudo->initNodeStyle(), as it can inherit
5308         // properly, but we should not:
5309         // - when re-rendering, initNodeStyleRecursive()/updateStyleDataRecursive()
5310         //   will iterate thru this node we just added as a child, and do it.
5311         // - when XML loading, we could do it for the "Before" pseudo element,
5312         //   but for the "After" one, we need to wait for all real children to be
5313         //   added and have their style applied - just because they can change
5314         //   open-quote/close-quote nesting levels - to be sure we get the
5315         //   proper nesting level quote char for the After node.
5316         // So, for the XML loading phase, we do that in onBodyEnter() and
5317         // onBodyExit() when called on the parent node.
5318     }
5319 
5320 #endif
5321 }
5322 
5323 #if BUILD_LITE!=1
resetRendMethodToInline(ldomNode * node)5324 static void resetRendMethodToInline( ldomNode * node )
5325 {
5326     // we shouldn't reset to inline (visible) if display: none
5327     // (using node->getRendMethod() != erm_invisible seems too greedy and may
5328     // hide other nodes)
5329     if (node->getStyle()->display != css_d_none)
5330         node->setRendMethod(erm_inline);
5331     else if (node->getDocument()->getDOMVersionRequested() < 20180528) // do that in all cases
5332         node->setRendMethod(erm_inline);
5333 }
5334 
resetRendMethodToInvisible(ldomNode * node)5335 static void resetRendMethodToInvisible( ldomNode * node )
5336 {
5337     node->setRendMethod(erm_invisible);
5338 }
5339 #endif
5340 
removeChildren(int startIndex,int endIndex)5341 void ldomNode::removeChildren( int startIndex, int endIndex )
5342 {
5343     for ( int i=endIndex; i>=startIndex; i-- ) {
5344         removeChild(i)->destroy();
5345     }
5346 }
5347 
autoboxChildren(int startIndex,int endIndex,bool handleFloating)5348 void ldomNode::autoboxChildren( int startIndex, int endIndex, bool handleFloating )
5349 {
5350 #if BUILD_LITE!=1
5351     if ( !isElement() )
5352         return;
5353     css_style_ref_t style = getStyle();
5354     bool pre = ( style->white_space >= css_ws_pre_line );
5355         // (css_ws_pre_line might need special care?)
5356     int firstNonEmpty = startIndex;
5357     int lastNonEmpty = endIndex;
5358 
5359     bool hasInline = pre;
5360     bool hasNonEmptyInline = pre;
5361     bool hasFloating = false;
5362     // (Note: did not check how floats inside <PRE> are supposed to work)
5363     if ( !pre ) {
5364         while ( firstNonEmpty<=endIndex && getChildNode(firstNonEmpty)->isText() ) {
5365             lString32 s = getChildNode(firstNonEmpty)->getText();
5366             if ( !IsEmptySpace(s.c_str(), s.length() ) )
5367                 break;
5368             firstNonEmpty++;
5369         }
5370         while ( lastNonEmpty>=endIndex && getChildNode(lastNonEmpty)->isText() ) {
5371             lString32 s = getChildNode(lastNonEmpty)->getText();
5372             if ( !IsEmptySpace(s.c_str(), s.length() ) )
5373                 break;
5374             lastNonEmpty--;
5375         }
5376 
5377         for ( int i=firstNonEmpty; i<=lastNonEmpty; i++ ) {
5378             ldomNode * node = getChildNode(i);
5379             if ( isInlineNode( node ) ) {
5380                 hasInline = true;
5381                 if ( !hasNonEmptyInline ) {
5382                     if (node->isText()) {
5383                         lString32 s = node->getText();
5384                         if ( !IsEmptySpace(s.c_str(), s.length() ) ) {
5385                             hasNonEmptyInline = true;
5386                         }
5387                     }
5388                     else {
5389                         if ( handleFloating && isFloatingNode(node) ) {
5390                             // Ignore floatings
5391                         }
5392                         else {
5393                             hasNonEmptyInline = true;
5394                             // Note: when not using DO_NOT_CLEAR_OWN_FLOATS, we might
5395                             // want to be more agressive in the removal of empty
5396                             // elements, including nested empty elements which would
5397                             // have no effect on the rendering (eg, some empty <link/>
5398                             // or <span id="PageNumber123"/>), to avoid having the float
5399                             // in an autoBox element with nothing else, which would
5400                             // then be cleared and leave some blank space.
5401                             // We initially did:
5402                             //    // For now, assume any inline node with some content
5403                             //    // (text or other inlines) is non empty.
5404                             //    if ( node->getChildCount() > 0 )
5405                             //        hasNonEmptyInline = true;
5406                             //    else if (node->getNodeId() == el_br) {
5407                             //        hasNonEmptyInline = true;
5408                             //    }
5409                             //    else {
5410                             //        const css_elem_def_props_t * ntype = node->getElementTypePtr();
5411                             //        if (ntype && ntype->is_object) // standalone image
5412                             //            hasNonEmptyInline = true;
5413                             //    }
5414                             // and we could even use hasNonEmptyInlineContent() to get
5415                             // rid of any nested empty elements and be sure to have our
5416                             // float standalone and be able to have it rendered as block
5417                             // instead of in an erm_final.
5418                             //
5419                             // But this was for edge cases (but really noticable), and it has
5420                             // become less critical now that we have/ DO_NOT_CLEAR_OWN_FLOATS,
5421                             // so let's not remove any element from our DOM (those with some
5422                             // id= attribute might be the target of a link).
5423                             //
5424                             // Sample test case in China.EN at the top of the "Politics" section:
5425                             //   "...</div> <link/> (or any text) <div float>...</div> <div>..."
5426                             // gets turned into:
5427                             //   "...</div>
5428                             //   <autoBoxing>
5429                             //     <link/> (or any text)
5430                             //     <floatBox>
5431                             //       <div float>...</div>
5432                             //     </floatBox>
5433                             //   </autoBoxing>
5434                             //   <div>..."
5435                             // If the floatbox would be let outside of the autobox, it would
5436                             // be fine when not DO_NOT_CLEAR_OWN_FLOATS too.
5437                         }
5438                     }
5439                 }
5440             }
5441             if ( handleFloating && isFloatingNode(node) )
5442                 hasFloating = true;
5443             if ( hasNonEmptyInline && hasFloating )
5444                 break; // We know, no need to look more
5445         }
5446     }
5447 
5448     if ( hasFloating && !hasNonEmptyInline) {
5449         // only multiple floats with empty spaces in between:
5450         // remove empty text nodes, and let the floats be blocks, don't autobox
5451         for ( int i=endIndex; i>=startIndex; i-- ) {
5452             if ( !isFloatingNode(getChildNode(i)) )
5453                 removeChildren(i, i);
5454         }
5455     }
5456     else if ( hasInline ) { //&& firstNonEmpty<=lastNonEmpty
5457 
5458 #ifdef TRACE_AUTOBOX
5459         CRLog::trace("Autobox children %d..%d of node <%s>  childCount=%d", firstNonEmpty, lastNonEmpty, LCSTR(getNodeName()), getChildCount());
5460 
5461         for ( int i=firstNonEmpty; i<=lastNonEmpty; i++ ) {
5462             ldomNode * node = getChildNode(i);
5463             if ( node->isText() )
5464                 CRLog::trace("    text: %d '%s'", node->getDataIndex(), LCSTR(node->getText()));
5465             else
5466                 CRLog::trace("    elem: %d <%s> rendMode=%d  display=%d", node->getDataIndex(), LCSTR(node->getNodeName()), node->getRendMethod(), node->getStyle()->display);
5467         }
5468 #endif
5469         // remove trailing empty
5470         removeChildren(lastNonEmpty+1, endIndex);
5471 
5472         // inner inline
5473         ldomNode * abox = insertChildElement( firstNonEmpty, LXML_NS_NONE, el_autoBoxing );
5474         moveItemsTo( abox, firstNonEmpty+1, lastNonEmpty+1 );
5475         // remove starting empty
5476         removeChildren(startIndex, firstNonEmpty-1);
5477         abox->initNodeStyle();
5478         if ( !BLOCK_RENDERING_N(this, FLOAT_FLOATBOXES) ) {
5479             // If we don't want floatBoxes floating, reset them to be
5480             // rendered inline among inlines
5481             abox->recurseMatchingElements( resetRendMethodToInline, isNotBoxingInlineBoxNode );
5482         }
5483         abox->setRendMethod( erm_final );
5484     }
5485     else if ( hasFloating) {
5486         // only floats, don't autobox them (otherwise the autobox wouldn't be floating)
5487         // remove trailing empty
5488         removeChildren(lastNonEmpty+1, endIndex);
5489         // remove starting empty
5490         removeChildren(startIndex, firstNonEmpty-1);
5491     }
5492     else {
5493         // only empty items: remove them instead of autoboxing
5494         removeChildren(startIndex, endIndex);
5495     }
5496 #endif
5497 }
5498 
cleanIfOnlyEmptyTextInline(bool handleFloating)5499 bool ldomNode::cleanIfOnlyEmptyTextInline( bool handleFloating )
5500 {
5501 #if BUILD_LITE!=1
5502     if ( !isElement() )
5503         return false;
5504     css_style_ref_t style = getStyle();
5505     if ( style->white_space >= css_ws_pre )
5506         return false; // Don't mess with PRE (css_ws_pre_line might need special care?)
5507     // We return false as soon as we find something non text, or text non empty
5508     int i = getChildCount()-1;
5509     for ( ; i>=0; i-- ) {
5510         ldomNode * node = getChildNode(i);
5511         if ( node->isText() ) {
5512             lString32 s = node->getText();
5513             if ( !IsEmptySpace(s.c_str(), s.length() ) ) {
5514                 return false;
5515             }
5516         }
5517         else if ( handleFloating && isFloatingNode(node) ) {
5518             // Ignore floatings
5519         }
5520         else { // non-text non-float element
5521             return false;
5522         }
5523     }
5524     // Ok, only empty text inlines, with possible floats
5525     i = getChildCount()-1;
5526     for ( ; i>=0; i-- ) {
5527         // With the tests done above, we just need to remove text nodes
5528         if ( getChildNode(i)->isText() ) {
5529             removeChildren(i, i);
5530         }
5531     }
5532     return true;
5533 #else
5534     return false;
5535 #endif
5536 }
5537 
5538 /// returns true if element has inline content (non empty text, images, <BR>)
hasNonEmptyInlineContent(bool ignoreFloats)5539 bool ldomNode::hasNonEmptyInlineContent( bool ignoreFloats )
5540 {
5541     if ( getRendMethod() == erm_invisible ) {
5542         return false;
5543     }
5544     if ( ignoreFloats && BLOCK_RENDERING_N(this, FLOAT_FLOATBOXES) && getStyle()->float_ > css_f_none ) {
5545         return false;
5546     }
5547     // With some other bool param, we might want to also check for
5548     // padding top/bottom (and height if check ENSURE_STYLE_HEIGHT)
5549     // if these will introduce some content.
5550     if ( isText() ) {
5551         lString32 s = getText();
5552         return !IsEmptySpace(s.c_str(), s.length() );
5553     }
5554     if (getNodeId() == el_br) {
5555         return true;
5556     }
5557     const css_elem_def_props_t * ntype = getElementTypePtr();
5558     if (ntype && ntype->is_object) { // standalone image
5559         return true;
5560     }
5561     for ( int i=0; i<(int)getChildCount(); i++ ) {
5562         if ( getChildNode(i)->hasNonEmptyInlineContent() ) {
5563             return true;
5564         }
5565     }
5566     return false;
5567 }
5568 
5569 #if BUILD_LITE!=1
detectChildTypes(ldomNode * parent,bool & hasBlockItems,bool & hasInline,bool & hasInternalTableItems,bool & hasFloating,bool detectFloating=false)5570 static void detectChildTypes( ldomNode * parent, bool & hasBlockItems, bool & hasInline,
5571                     bool & hasInternalTableItems, bool & hasFloating, bool detectFloating=false )
5572 {
5573     hasBlockItems = false;
5574     hasInline = false;
5575     hasFloating = false;
5576     if ( parent->getNodeId() == el_pseudoElem ) {
5577         // pseudoElem (generated from CSS ::before and ::after), will have
5578         // some (possibly empty) plain text content.
5579         hasInline = true;
5580         return; // and it has no children
5581     }
5582     int len = parent->getChildCount();
5583     for ( int i=len-1; i>=0; i-- ) {
5584         ldomNode * node = parent->getChildNode(i);
5585         if ( !node->isElement() ) {
5586             // text
5587             hasInline = true;
5588         }
5589         else if ( detectFloating && node->getStyle()->float_ > css_f_none ) {
5590             hasFloating = true;
5591         }
5592         else {
5593             // element
5594             int d = node->getStyle()->display;
5595             int m = node->getRendMethod();
5596             if ( d==css_d_none || m==erm_invisible )
5597                 continue;
5598             if ( m==erm_inline ) { //d==css_d_inline || d==css_d_run_in
5599                 hasInline = true;
5600             } else {
5601                 hasBlockItems = true;
5602                 // (Table internal elements are all block items in the context
5603                 // where hasBlockItems is used, so account for them in both)
5604                 if ( ( d > css_d_table && d <= css_d_table_caption ) || ( m > erm_table ) ) {
5605                     hasInternalTableItems = true;
5606                 }
5607             }
5608         }
5609     }
5610 }
5611 
5612 // Generic version of autoboxChildren() without any specific inline/block checking,
5613 // accepting any element id (from the enum el_*, like el_div, el_tabularBox) as
5614 // the wrapping element.
boxWrapChildren(int startIndex,int endIndex,lUInt16 elementId)5615 ldomNode * ldomNode::boxWrapChildren( int startIndex, int endIndex, lUInt16 elementId )
5616 {
5617     if ( !isElement() )
5618         return NULL;
5619     int firstNonEmpty = startIndex;
5620     int lastNonEmpty = endIndex;
5621 
5622     while ( firstNonEmpty<=endIndex && getChildNode(firstNonEmpty)->isText() ) {
5623         lString32 s = getChildNode(firstNonEmpty)->getText();
5624         if ( !IsEmptySpace(s.c_str(), s.length() ) )
5625             break;
5626         firstNonEmpty++;
5627     }
5628     while ( lastNonEmpty>=endIndex && getChildNode(lastNonEmpty)->isText() ) {
5629         lString32 s = getChildNode(lastNonEmpty)->getText();
5630         if ( !IsEmptySpace(s.c_str(), s.length() ) )
5631             break;
5632         lastNonEmpty--;
5633     }
5634 
5635     // printf("boxWrapChildren %d>%d | %d<%d\n", startIndex, firstNonEmpty, lastNonEmpty, endIndex);
5636     if ( firstNonEmpty<=lastNonEmpty ) {
5637         // remove trailing empty
5638         removeChildren(lastNonEmpty+1, endIndex);
5639         // create wrapping container
5640         ldomNode * box = insertChildElement( firstNonEmpty, LXML_NS_NONE, elementId );
5641         moveItemsTo( box, firstNonEmpty+1, lastNonEmpty+1 );
5642         // remove starting empty
5643         removeChildren(startIndex, firstNonEmpty-1);
5644         return box;
5645     }
5646     else {
5647         // Only empty items: remove them instead of box wrapping them
5648         removeChildren(startIndex, endIndex);
5649         return NULL;
5650     }
5651 }
5652 
5653 // Uncomment to debug COMPLETE_INCOMPLETE_TABLES tabularBox wrapping
5654 // #define DEBUG_INCOMPLETE_TABLE_COMPLETION
5655 
5656 // init table element render methods
5657 // states: 0=table, 1=colgroup, 2=rowgroup, 3=row, 4=cell
5658 // returns table cell count
5659 // When BLOCK_RENDERING_COMPLETE_INCOMPLETE_TABLES, we follow rules
5660 // from the "Generate missing child wrappers" section in:
5661 //   https://www.w3.org/TR/CSS22/tables.html#anonymous-boxes
5662 //   https://www.w3.org/TR/css-tables-3/#fixup (clearer than previous one)
5663 // and we wrap unproper children in a tabularBox element.
initTableRendMethods(ldomNode * enode,int state)5664 int initTableRendMethods( ldomNode * enode, int state )
5665 {
5666     //main node: table
5667     if ( state==0 && ( enode->getStyle()->display==css_d_table ||
5668                        enode->getStyle()->display==css_d_inline_table ||
5669                       (enode->getStyle()->display==css_d_inline_block && enode->getNodeId()==el_table) ) ) {
5670         enode->setRendMethod( erm_table );
5671     }
5672     int cellCount = 0; // (returned, but not used anywhere)
5673     int cnt = enode->getChildCount();
5674     int i;
5675     int first_unproper = -1; // keep track of consecutive unproper children that
5676     int last_unproper = -1;  // must all be wrapped in a single wrapper
5677     for (i=0; i<cnt; i++) {
5678         ldomNode * child = enode->getChildNode( i );
5679         css_display_t d;
5680         if ( child->isElement() ) {
5681             d = child->getStyle()->display;
5682         }
5683         else { // text node
5684             d = css_d_inline;
5685             // Not sure about what to do with whitespace only text nodes:
5686             // we shouldn't meet any alongside real elements (as whitespace
5687             // around and at start/end of block nodes are discarded), but
5688             // we may in case of style changes (inline > table) after
5689             // a book has been loaded.
5690             // Not sure if we should handle them differently when no unproper
5691             // elements yet (they will be discarded by the table render algo),
5692             // and when among unpropers (they could find their place in the
5693             // wrapped table cell).
5694             // Note that boxWrapChildren() called below will remove
5695             // them at start or end of an unproper elements sequence.
5696         }
5697         bool is_last = (i == cnt-1);
5698         bool is_proper = false;
5699         if ( state==0 ) { // in table
5700             if ( d==css_d_table_row ) {
5701                 child->setRendMethod( erm_table_row );
5702                 cellCount += initTableRendMethods( child, 3 ); // > row
5703                 is_proper = true;
5704             }
5705             else if ( d==css_d_table_row_group ) {
5706                 child->setRendMethod( erm_table_row_group );
5707                 cellCount += initTableRendMethods( child, 2 ); // > rowgroup
5708                 is_proper = true;
5709             }
5710             else if ( d==css_d_table_header_group ) {
5711                 child->setRendMethod( erm_table_header_group );
5712                 cellCount += initTableRendMethods( child, 2 ); // > rowgroup
5713                 is_proper = true;
5714             }
5715             else if ( d==css_d_table_footer_group ) {
5716                 child->setRendMethod( erm_table_footer_group );
5717                 cellCount += initTableRendMethods( child, 2 ); // > rowgroup
5718                 is_proper = true;
5719             }
5720             else if ( d==css_d_table_column_group ) {
5721                 child->setRendMethod( erm_table_column_group );
5722                 cellCount += initTableRendMethods( child, 1 ); // > colgroup
5723                 is_proper = true;
5724             }
5725             else if ( d==css_d_table_column ) {
5726                 child->setRendMethod( erm_table_column );
5727                 is_proper = true;
5728             }
5729             else if ( d==css_d_table_caption ) {
5730                 child->setRendMethod( erm_final );
5731                 is_proper = true;
5732             }
5733             else if ( d==css_d_none ) {
5734                 child->setRendMethod( erm_invisible );
5735                 is_proper = true;
5736             }
5737             else if ( child->getNodeId()==el_tabularBox ) {
5738                 // Most probably added by us in a previous rendering
5739                 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5740                     printf("initTableRendMethods(0): (reused)wrapping unproper > row\n");
5741                 #endif
5742                 child->setRendMethod( erm_table_row );
5743                 cellCount += initTableRendMethods( child, 3 ); // > row
5744                 is_proper = true;
5745             }
5746         }
5747         else if ( state==2 ) { // in rowgroup
5748             if ( d==css_d_table_row ) {
5749                 child->setRendMethod( erm_table_row );
5750                 cellCount += initTableRendMethods( child, 3 ); // > row
5751                 is_proper = true;
5752             }
5753             else if ( d==css_d_none ) {
5754                 child->setRendMethod( erm_invisible );
5755                 is_proper = true;
5756             }
5757             else if ( child->getNodeId()==el_tabularBox ) {
5758                 // Most probably added by us in a previous rendering
5759                 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5760                     printf("initTableRendMethods(2): (reused)wrapping unproper > row\n");
5761                 #endif
5762                 child->setRendMethod( erm_table_row );
5763                 cellCount += initTableRendMethods( child, 3 ); // > row
5764                 is_proper = true;
5765             }
5766         }
5767         else if ( state==3 ) { // in row
5768             if ( d==css_d_table_cell ) {
5769                 // This will set the rend method of the cell to either erm_block
5770                 // or erm_final, depending on its content.
5771                 child->initNodeRendMethodRecursive();
5772                 cellCount++;
5773                 is_proper = true;
5774             }
5775             else if ( d==css_d_none ) {
5776                 child->setRendMethod( erm_invisible );
5777                 is_proper = true;
5778             }
5779             else if ( child->getNodeId()==el_tabularBox ) {
5780                 // Most probably added by us in a previous rendering
5781                 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5782                     printf("initTableRendMethods(3): (reused)wrapping unproper > cell\n");
5783                 #endif
5784                 // This will set the rend method of the cell to either erm_block
5785                 // or erm_final, depending on its content.
5786                 child->initNodeRendMethodRecursive();
5787                 cellCount++;
5788                 is_proper = true;
5789             }
5790         }
5791         else if ( state==1 ) { // in colgroup
5792             if ( d==css_d_table_column ) {
5793                 child->setRendMethod( erm_table_column );
5794                 is_proper = true;
5795             }
5796             else {
5797                 // No need to tabularBox invalid colgroup children:
5798                 // they are not rendered, and should be considered
5799                 // as if display: none.
5800                 child->setRendMethod( erm_invisible );
5801                 is_proper = true;
5802             }
5803         }
5804         else { // shouldn't be reached
5805             crFatalError(151, "initTableRendMethods state unexpected");
5806             // child->setRendMethod( erm_final );
5807         }
5808 
5809         // Check and deal with unproper children
5810         if ( !is_proper ) { // Unproper child met
5811             // printf("initTableRendMethods(%d): child %d is unproper\n", state, i);
5812             lUInt32 rend_flags = enode->getDocument()->getRenderBlockRenderingFlags();
5813             if ( BLOCK_RENDERING(rend_flags, COMPLETE_INCOMPLETE_TABLES) ) {
5814                 // We can insert a tabularBox element to wrap unproper elements
5815                 last_unproper = i;
5816                 if (first_unproper < 0)
5817                     first_unproper = i;
5818             }
5819             else {
5820                 // Asked to not complete incomplete tables, or we can't insert
5821                 // tabularBox elements anymore
5822                 if ( !BLOCK_RENDERING(rend_flags, ENHANCED) ) {
5823                     // Legacy behaviour was to just make invisible internal-table
5824                     // elements that were not found in their proper internal-table
5825                     // container, but let other non-internal-table elements be
5826                     // (which might be rendered and drawn quite correctly when
5827                     // they are erm_final/erm_block, but won't be if erm_inline).
5828                     if ( d > css_d_table ) {
5829                         child->setRendMethod( erm_invisible );
5830                     }
5831                 }
5832                 else {
5833                     // When in enhanced mode, we let the ones that could
5834                     // be rendered and drawn quite correctly be. But we'll
5835                     // have the others drawn as erm_killed, showing a small
5836                     // symbol so users know some content is missing.
5837                     if ( d > css_d_table || d <= css_d_inline ) {
5838                         child->setRendMethod( erm_killed );
5839                     }
5840                     // Note that there are other situations where some content
5841                     // would not be shown when !COMPLETE_INCOMPLETE_TABLES, and
5842                     // for which we are not really able to set some node as
5843                     // erm_killed (for example, with TABLE > TABLE, the inner
5844                     // one will be rendered, but the outer one would have
5845                     // a height=0, and so the inner content will overflow
5846                     // its container and will not be drawn...)
5847                 }
5848             }
5849         }
5850         if ( first_unproper >= 0 && (is_proper || is_last) ) {
5851             // We met unproper children, but we now have a proper child, or we're done:
5852             // wrap all these consecutive unproper nodes inside a single tabularBox
5853             // element with the proper rendering method.
5854             #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5855                 printf("initTableRendMethods(%d): wrapping unproper %d>%d\n",
5856                             state, first_unproper, last_unproper);
5857             #endif
5858             int elems_removed = last_unproper - first_unproper + 1;
5859             ldomNode * tbox = enode->boxWrapChildren(first_unproper, last_unproper, el_tabularBox);
5860             if ( tbox && !tbox->isNull() ) {
5861                 elems_removed -= 1; // tabularBox added
5862                 if ( state==0 || state==2 ) { // in table or rowgroup
5863                     // No real need to store the style as an attribute: it would
5864                     // be remembered and re-used when styles change, and just
5865                     // setting the appropriate rendering method is all that is
5866                     // needed for rendering after this.
5867                     // tbox->setAttributeValue(LXML_NS_NONE, enode->getDocument()->getAttrNameIndex(U"style"), U"display: table-row");
5868                     tbox->initNodeStyle();
5869                     tbox->setRendMethod( erm_table_row );
5870                     cellCount += initTableRendMethods( tbox, 3 ); // > row
5871                 }
5872                 else if ( state==3 ) {
5873                     tbox->initNodeStyle();
5874                     // This will set the rend method of the cell to either erm_block
5875                     // or erm_final, depending on its content.
5876                     tbox->initNodeRendMethodRecursive();
5877                     cellCount++;
5878                 }
5879                 else if ( state==1 ) { // should not happen, see above
5880                     tbox->initNodeStyle();
5881                     tbox->setRendMethod( erm_table_column );
5882                 }
5883             }
5884             // If tbox is NULL, all unproper have been removed, and no element added
5885             if (is_last)
5886                 break;
5887             // Account for what's been removed in our loop index and end
5888             i -= elems_removed;
5889             cnt -= elems_removed;
5890             first_unproper = -1;
5891             last_unproper = -1;
5892         }
5893     }
5894     // if ( state==0 ) {
5895     //     dumpRendMethods( enode, cs32("   ") );
5896     // }
5897     return cellCount;
5898 }
5899 
hasInvisibleParent(ldomNode * node)5900 bool hasInvisibleParent( ldomNode * node )
5901 {
5902     for ( ; !node->isRoot(); node = node->getParentNode() )
5903         if ( node->getStyle()->display==css_d_none )
5904             return true;
5905     return false;
5906 }
5907 
isFloatingBox() const5908 bool ldomNode::isFloatingBox() const
5909 {
5910     // BLOCK_RENDERING_FLOAT_FLOATBOXES is what triggers rendering
5911     // the floats floating. They are wrapped in a floatBox, possibly
5912     // not floating, when BLOCK_RENDERING_WRAP_FLOATS.
5913     if ( BLOCK_RENDERING_N(this, FLOAT_FLOATBOXES) && getNodeId() == el_floatBox
5914                 && getStyle()->float_ > css_f_none)
5915         return true;
5916     return false;
5917 }
5918 
5919 /// is node an inlineBox that has not been re-inlined by having
5920 /// its child no more inline-block/inline-table
isBoxingInlineBox() const5921 bool ldomNode::isBoxingInlineBox() const
5922 {
5923     // BLOCK_RENDERING_BOX_INLINE_BLOCKS) is what ensures inline-block
5924     // are boxed and rendered as an inline block, but we may have them
5925     // wrapping a node that is no more inline-block (when some style
5926     // tweaks have changed the display: property).
5927     if ( getNodeId() == el_inlineBox && BLOCK_RENDERING_N(this, BOX_INLINE_BLOCKS) ) {
5928         if (getChildCount() == 1) {
5929             css_display_t d = getChildNode(0)->getStyle()->display;
5930             if (d == css_d_inline_block || d == css_d_inline_table) {
5931                 return true;
5932             }
5933             // Also if this box parent is <ruby> and if what this inlineBox
5934             // contains (probably a rubyBox) is being rendered as erm_table
5935             if ( getChildNode(0)->getRendMethod() == erm_table && getParentNode()
5936                         && getParentNode()->getStyle()->display == css_d_ruby ) {
5937                 return true;
5938             }
5939             return isEmbeddedBlockBoxingInlineBox(true); // avoid rechecking what we just checked
5940         }
5941     }
5942     return false;
5943 }
5944 
5945 /// is node an inlineBox that wraps a bogus embedded block (not inline-block/inline-table)
5946 /// can be called with inline_box_checks_done=true when isBoxingInlineBox() has already
5947 /// been called to avoid rechecking what is known
isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done) const5948 bool ldomNode::isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done) const
5949 {
5950     if ( !inline_box_checks_done ) {
5951         if ( getNodeId() != el_inlineBox || !BLOCK_RENDERING_N(this, BOX_INLINE_BLOCKS) )
5952             return false;
5953         if (getChildCount() != 1)
5954             return false;
5955         css_display_t d = getChildNode(0)->getStyle()->display;
5956         if (d == css_d_inline_block || d == css_d_inline_table) {
5957             return false; // regular boxing inlineBox
5958         }
5959         if ( getChildNode(0)->getRendMethod() == erm_table && getParentNode()
5960                     && getParentNode()->getStyle()->display == css_d_ruby ) {
5961             return false; // inlineBox wrapping a rubyBox as a child of <ruby>
5962         }
5963     }
5964     if ( hasAttribute( attr_T ) ) { // T="EmbeddedBlock"
5965             // (no other possible value yet, no need to compare strings)
5966         int cm = getChildNode(0)->getRendMethod();
5967         if ( cm == erm_inline || cm == erm_invisible || cm == erm_killed )
5968             return false; // child has been reset to inline
5969         return true;
5970     }
5971     return false;
5972 }
5973 
initNodeRendMethod()5974 void ldomNode::initNodeRendMethod()
5975 {
5976     // This method is called when re-rendering, but also while
5977     // initially loading a document.
5978     // On initial loading:
5979     //   A node's style is defined when the node element XML tag
5980     //   opening is processed (by lvrend.cpp setNodeStyle() which
5981     //   applies inheritance from its parent, which has
5982     //   already been parsed).
5983     //   This method is called when the node element XML tag is
5984     //   closed, so all its children are known, have styles, and
5985     //   have had this method called on them.
5986     // On re-rendering:
5987     //   Styles are first applied recursively, parents first (because
5988     //   of inheritance).
5989     //   This method is then called thru recurseElementsDeepFirst, so
5990     //   from deepest children up to their parents up to the root node.
5991     // So, this method should decide how this node is going to be
5992     // rendered (inline, block containing other blocks, or final block
5993     // containing only inlines), only from the node's own style, and
5994     // from the styles and rendering methods of its children.
5995     if ( !isElement() )
5996         return;
5997     if ( isRoot() ) {
5998         setRendMethod(erm_block);
5999         return;
6000     }
6001 
6002     // DEBUG TEST
6003     // if ( getParentNode()->getChildIndex( getDataIndex() )<0 ) {
6004     //     CRLog::error("Invalid parent->child relation for nodes %d->%d", getParentNode()->getDataIndex(), getDataIndex() );
6005     // }
6006     // if ( getNodeName() == "image" ) {
6007     //     CRLog::trace("Init log for image");
6008     // }
6009 
6010     // Needed if COMPLETE_INCOMPLETE_TABLES, so have it updated along
6011     // the way to avoid an extra loop for checking if we have some.
6012     bool hasInternalTableItems = false;
6013 
6014     int d = getStyle()->display;
6015     lUInt32 rend_flags = getDocument()->getRenderBlockRenderingFlags();
6016 
6017     if ( hasInvisibleParent(this) ) { // (should be named isInvisibleOrHasInvisibleParent())
6018         // Note: we could avoid that up-to-root-node walk for each node
6019         // by inheriting css_d_none in setNodeStyle(), and just using
6020         // "if ( d==css_d_none )" instead of hasInvisibleParent(this).
6021         // But not certain this would have no side effect, and some
6022         // quick tests show no noticeable change in rendering timing.
6023         //
6024         //recurseElements( resetRendMethodToInvisible );
6025         setRendMethod(erm_invisible);
6026     } else if ( d==css_d_inline ) {
6027         // Used to be: an inline parent resets all its children to inline
6028         //   (so, if some block content is erroneously wrapped in a SPAN, all
6029         //   the content became inline...), except, depending on what's enabled:
6030         //   - nodes with float: which can stay block among inlines
6031         //   - the inner content of inlineBoxes (the inlineBox is already inline)
6032         //   recurseMatchingElements( resetRendMethodToInline, isNotBoxWrappingNode );
6033         //
6034         // But we don't want to "reset all its children to inline" when a bogus
6035         // spurious block element happens to be inside some inline one, as this
6036         // can be seen happening (<small> multiple <p>...</small>).
6037         // So, when BOX_INLINE_BLOCKS is enabled, we wrap such block elements inside
6038         // a <inlineBox> element, nearly just like if it were "display: inline-block",
6039         // with a few tweaks in its rendering (see below).
6040         // Or, if it contains only block elements, and empty text nodes, we can just
6041         // set this inline element to be erm_block.
6042         //
6043         // Some discussions about that "block inside inline" at:
6044         //   https://github.com/w3c/csswg-drafts/issues/1477
6045         //   https://stackoverflow.com/questions/1371307/displayblock-inside-displayinline
6046         //
6047         if ( !BLOCK_RENDERING(rend_flags, BOX_INLINE_BLOCKS) ) {
6048             // No support for anything but inline elements, and possibly embedded floats
6049             recurseMatchingElements( resetRendMethodToInline, isNotBoxWrappingNode );
6050         }
6051         else if ( !isNotBoxWrappingNode(this) ) {
6052             // If this node is already a box wrapping node (active floatBox or inlineBox,
6053             // possibly a <inlineBox T="EmbeddedBlock"> created here in a previous
6054             // rendering), just set it to erm_inline.
6055             setRendMethod(erm_inline);
6056         }
6057         else {
6058             // Set this inline element to be erm_inline, and look at its children
6059             setRendMethod(erm_inline);
6060             // Quick scan first, before going into more checks if needed
6061             bool has_block_nodes = false;
6062             bool has_inline_nodes = false;
6063             for ( int i=0; i < getChildCount(); i++ ) {
6064                 ldomNode * child = getChildNode( i );
6065                 if ( !child->isElement() ) // text node
6066                     continue;
6067                 int cm = child->getRendMethod();
6068                 if ( cm == erm_inline ) {
6069                     has_inline_nodes = true; // We won't be able to make it erm_block
6070                     continue;
6071                 }
6072                 if ( cm == erm_invisible || cm == erm_killed )
6073                     continue;
6074                 if ( !isNotBoxWrappingNode( child ) ) {
6075                     // This child is already wrapped by a floatBox or inlineBox
6076                     continue;
6077                 }
6078                 has_block_nodes = true;
6079                 if ( has_inline_nodes )
6080                     break; // we know enough
6081             }
6082             if ( has_block_nodes ) {
6083                 bool has_non_empty_text_nodes = false;
6084                 bool do_wrap_blocks = true;
6085                 if ( !has_inline_nodes ) {
6086                     // No real inline nodes. Inspect each text node to see if they
6087                     // are all empty text.
6088                     for ( int i=0; i < getChildCount(); i++ ) {
6089                         if ( getChildNode(i)->isText() ) {
6090                             lString32 s = getChildNode(i)->getText();
6091                             if ( !IsEmptySpace(s.c_str(), s.length() ) ) {
6092                                 has_non_empty_text_nodes = true;
6093                                 break;
6094                             }
6095                         }
6096                     }
6097                     if ( !has_non_empty_text_nodes ) {
6098                         // We can be a block wrapper (renderBlockElementEnhanced/Legacy will
6099                         // skip empty text nodes, no need to remove them)
6100                         setRendMethod(erm_block);
6101                         do_wrap_blocks = false;
6102                     }
6103                 }
6104                 if ( do_wrap_blocks ) {
6105                     // We have a mix of inline nodes or non-empty text, and block elements:
6106                     // wrap each block element in a <inlineBox T="EmbeddedBlock">.
6107                     for ( int i=getChildCount()-1; i >=0; i-- ) {
6108                         ldomNode * child = getChildNode( i );
6109                         if ( !child->isElement() ) // text node
6110                             continue;
6111                         int cm = child->getRendMethod();
6112                         if ( cm == erm_inline || cm == erm_invisible || cm == erm_killed )
6113                             continue;
6114                         if ( !isNotBoxWrappingNode( child ) )
6115                             continue;
6116                         // This child is erm_block or erm_final (or some other erm_table like rend method).
6117                         // It will be inside a upper erm_final
6118                         // Wrap this element into an inlineBox, just as if it was display:inline-block,
6119                         // with a few differences that will be handled by lvrend.cpp/lvtextfm.cpp:
6120                         // - it should behave like if it has width: 100%, so preceeding
6121                         //   and following text/inlines element will be on their own line
6122                         // - the previous line should not be justified
6123                         // - in the matter of page splitting, lines (as they are 100%-width) should
6124                         //   be forwarded to the parent flow/context
6125                         // Remove any preceeding or following empty text nodes (there can't
6126                         // be consecutive text nodes) so we don't get spurious empty lines.
6127                         if ( i < getChildCount()-1 && getChildNode(i+1)->isText() ) {
6128                             lString32 s = getChildNode(i+1)->getText();
6129                             if ( IsEmptySpace(s.c_str(), s.length() ) ) {
6130                                 removeChildren(i+1, i+1);
6131                             }
6132                         }
6133                         if ( i > 0 && getChildNode(i-1)->isText() ) {
6134                             lString32 s = getChildNode(i-1)->getText();
6135                             if ( IsEmptySpace(s.c_str(), s.length() ) ) {
6136                                 removeChildren(i-1, i-1);
6137                                 i--; // update our position
6138                             }
6139                         }
6140                         ldomNode * ibox = insertChildElement( i, LXML_NS_NONE, el_inlineBox );
6141                         moveItemsTo( ibox, i+1, i+1 ); // move this child from 'this' into ibox
6142                         // Mark this inlineBox so we can handle its pecularities
6143                         ibox->setAttributeValue(LXML_NS_NONE, attr_T, U"EmbeddedBlock");
6144                         setNodeStyle( ibox, getStyle(), getFont() );
6145                         ibox->setRendMethod( erm_inline );
6146                     }
6147                 }
6148             }
6149         }
6150     } else if ( d==css_d_ruby ) {
6151         // This will be dealt in a big section below. For now, reset everything
6152         // to inline as ruby is only allowed to contain inline content.
6153         // We don't support the newer display: values like ruby-base, ruby-text...,
6154         // but only "display: ruby" which is just set on the <ruby> element
6155         // (which allows us to have it reset back to "display: inline" if we
6156         // don't wan't ruby support).
6157         //   recurseElements( resetRendMethodToInline );
6158         // Or may be not: looks like we can support <ruby> inside <ruby>,
6159         // so allow that; and probably anything nested, as we'll handle
6160         // that just like a table cell content.
6161         setRendMethod(erm_inline);
6162     } else if ( d==css_d_run_in ) {
6163         // runin
6164         //CRLog::trace("switch all children elements of <%s> to inline", LCSTR(getNodeName()));
6165         recurseElements( resetRendMethodToInline );
6166         setRendMethod(erm_inline);
6167     } else if ( d==css_d_list_item_legacy ) {
6168         // list item (no more used, obsolete rendering method)
6169         setRendMethod(erm_final);
6170     } else if ( d==css_d_table ) {
6171         // table: this will "Generate missing child wrappers" if needed
6172         initTableRendMethods( this, 0 );
6173         // Not sure if we should do the same for the other css_d_table_* and
6174         // call initTableRendMethods(this, 1/2/3) so that the "Generate missing
6175         // child wrappers" step is done before the "Generate missing parents" step
6176         // we might be doing below - to conform to the order of steps in the specs.
6177     } else if ( d==css_d_inline_table && ( BLOCK_RENDERING(rend_flags, COMPLETE_INCOMPLETE_TABLES) || getNodeId()==el_table ) ) {
6178         // Only if we're able to complete incomplete tables, or if this
6179         // node is itself a <TABLE>. Otherwise, fallback to the following
6180         // catch-all 'else' and render its content as block.
6181         //   (Note that we should skip that if the node is an image, as
6182         //   initTableRendMethods() would not be able to do anything with
6183         //   it as it can't add children to an IMG. Hopefully, the specs
6184         //   say replaced elements like IMG should not have table-like
6185         //   display: values - which setNodeStyle() ensures.)
6186         // Any element can have "display: inline-table", and if it's not
6187         // a TABLE, initTableRendMethods() will complete/wrap it to make
6188         // it possibly the single cell of a TABLE. This should naturally
6189         // ensure all the differences between inline-block and inline-table.
6190         // https://stackoverflow.com/questions/19352072/what-is-the-difference-between-inline-block-and-inline-table/19352149#19352149
6191         initTableRendMethods( this, 0 );
6192         // Note: if (d==css_d_inline_block && getNodeId()==el_table), we
6193         // should NOT call initTableRendMethods()! It should be rendered
6194         // as a block, and if its children are actually TRs, they will be
6195         // wrapped in a "missing parent" tabularBox wrapper that will
6196         // have initTableRendMethods() called on it.
6197     } else {
6198         // block or final
6199         // remove last empty space text nodes
6200         bool hasBlockItems = false;
6201         bool hasInline = false;
6202         bool hasFloating = false;
6203         // Floating nodes, thus block, are accounted apart from inlines
6204         // and blocks, as their behaviour is quite specific.
6205         // - When !PREPARE_FLOATBOXES, we just don't deal specifically with
6206         //   floats, for a rendering more similar to legacy rendering: SPANs
6207         //   with float: will be considered as non-floating inline, while
6208         //   DIVs with float: will be considered as block elements, possibly
6209         //   causing autoBoxing of surrounding content with only inlines.
6210         // - When PREPARE_FLOATBOXES (even if !FLOAT_FLOATBOXES), we do prepare
6211         //   floats and floatBoxes to be consistent, ready to be floating, or
6212         //   not and flat (with a rendering possibly not similar to legacy),
6213         //   without any display hash mismatch (so that toggling does not
6214         //   require a full reloading). SPANs and DIVs with float: mixed with
6215         //   inlines will be considered as inline when !FLOAT_FLOATBOXES, to
6216         //   avoid having autoBoxing elements that would mess with a correct
6217         //   floating rendering.
6218         // Note that FLOAT_FLOATBOXES requires having PREPARE_FLOATBOXES.
6219         bool handleFloating = BLOCK_RENDERING(rend_flags, PREPARE_FLOATBOXES);
6220 
6221         detectChildTypes( this, hasBlockItems, hasInline, hasInternalTableItems, hasFloating, handleFloating );
6222         const css_elem_def_props_t * ntype = getElementTypePtr();
6223         if (ntype && ntype->is_object) { // image
6224             // No reason to erm_invisible an image !
6225             // And it has to be erm_final to be drawn (or set to erm_inline
6226             // by some upper node).
6227             // (Note that setNodeStyle() made sure an image can't be
6228             // css_d_inline_table/css_d_table*, as per specs.)
6229             setRendMethod( erm_final );
6230             /* used to be:
6231             switch ( d )
6232             {
6233             case css_d_block:
6234             case css_d_list_item_block:
6235             case css_d_inline:
6236             case css_d_inline_block:
6237             case css_d_inline_table:
6238             case css_d_run_in:
6239                 setRendMethod( erm_final );
6240                 break;
6241             default:
6242                 //setRendMethod( erm_invisible );
6243                 recurseElements( resetRendMethodToInvisible );
6244                 break;
6245             }
6246             */
6247         } else if ( hasBlockItems && !hasInline ) {
6248             // only blocks (or floating blocks) inside
6249             setRendMethod( erm_block );
6250         } else if ( !hasBlockItems && hasInline ) {
6251             // only inline (with possibly floating blocks that will
6252             // be dealt with by renderFinalBlock)
6253             if ( hasFloating ) {
6254                 // If all the inline elements are empty space, we may as well
6255                 // remove them and have our floats contained in a erm_block
6256                 if ( cleanIfOnlyEmptyTextInline(true) ) {
6257                     setRendMethod( erm_block );
6258                 }
6259                 else {
6260                     if ( !BLOCK_RENDERING(rend_flags, FLOAT_FLOATBOXES) ) {
6261                         // If we don't want floatBoxes floating, reset them to be
6262                         // rendered inline among inlines
6263                         recurseMatchingElements( resetRendMethodToInline, isNotBoxingInlineBoxNode );
6264                     }
6265                     setRendMethod( erm_final );
6266                 }
6267             }
6268             else {
6269                 setRendMethod( erm_final );
6270             }
6271         } else if ( !hasBlockItems && !hasInline ) {
6272             // nothing (or only floating blocks)
6273             // (don't ignore it as it might be some HR with borders/padding,
6274             // even if no content)
6275             setRendMethod( erm_block );
6276         } else if ( hasBlockItems && hasInline ) {
6277             // Mixed content of blocks and inline elements:
6278             // the consecutive inline elements should be considered part
6279             // of an anonymous block element - non-anonymous for crengine,
6280             // as we create a <autoBoxing> element and add it to the DOM),
6281             // taking care of ignoring unvaluable inline elements consisting
6282             // of only spaces.
6283             //   Note: when there are blocks, inlines and floats mixed, we could
6284             //   choose to let the floats be blocks, or include them with the
6285             //   surrounding inlines into an autoBoxing:
6286             //   - blocks: they will just be footprints (so, only 2 squares at
6287             //   top left and right) over the inline/final content, and when
6288             //   there are many, the text may not wrap fully around the floats...
6289             //   - with inlines: they will wrap fully, but if the text is short,
6290             //   the floats will be cleared, and there will be blank vertical
6291             //   filling space...
6292             //   The rendering can be really different, and there's no real way
6293             //   of knowing which will be the best.
6294             //   So, for now, go with including them with inlines into the
6295             //   erm_final autoBoxing.
6296             // The above has become less critical after we added DO_NOT_CLEAR_OWN_FLOATS
6297             // and ALLOW_EXACT_FLOATS_FOOTPRINTS, and both options should render
6298             // similarly.
6299             // But still going with including them with inlines is best, as we
6300             // don't need to include them in the footprint (so, the limit of
6301             // 5 outer block float IDs is still available for real outer floats).
6302             if ( getParentNode()->getNodeId()==el_autoBoxing ) {
6303                 // already autoboxed
6304                 setRendMethod( erm_final );
6305                 // This looks wrong: no reason to force child of autoBoxing to be
6306                 // erm_final: most often, the autoBoxing has been created to contain
6307                 // only inlines and set itself to be erm_final. So, it would have been
6308                 // caught by the 'else if ( !hasBlockItems && hasInline )' above and
6309                 // set to erm_final. If not, styles have changed, and it may contain
6310                 // a mess of styles: it might be better to proceed with the following
6311                 // cleanup (and have autoBoxing re-autoboxed... or not at all when
6312                 // a cache file is used, and we'll end up being erm_final anyway).
6313                 // But let's keep it, in case it handles some edge cases.
6314             } else {
6315                 // cleanup or autobox
6316                 int i=getChildCount()-1;
6317                 for ( ; i>=0; i-- ) {
6318                     ldomNode * node = getChildNode(i);
6319 
6320                     // DEBUG TEST
6321                     // if ( getParentNode()->getChildIndex( getDataIndex() )<0 ) {
6322                     //    CRLog::error("Invalid parent->child relation for nodes %d->%d",
6323                     //              getParentNode()->getDataIndex(), getDataIndex() );
6324                     // }
6325 
6326                     // We want to keep float:'ing nodes with inline nodes, so they stick with their
6327                     // siblings inline nodes in an autoBox: the erm_final autoBox will deal
6328                     // with rendering the floating node, and the inline text around it
6329                     if ( isInlineNode(node) || (handleFloating && isFloatingNode(node)) ) {
6330                         int j = i-1;
6331                         for ( ; j>=0; j-- ) {
6332                             node = getChildNode(j);
6333                             if ( !isInlineNode(node) && !(handleFloating && isFloatingNode(node)) )
6334                                 break;
6335                         }
6336                         j++;
6337                         // j..i are inline
6338                         if ( j>0 || i<(int)getChildCount()-1 )
6339                             autoboxChildren( j, i, handleFloating );
6340                         i = j;
6341                     }
6342                     else if ( i>0 && node->getRendMethod() == erm_final ) {
6343                         // (We skip the following if the current node is not erm_final, as
6344                         // if it is erm_block, we would break the block layout by making
6345                         // it all inline in an erm_final autoBoxing.)
6346                         // This node is not inline, but might be preceeded by a css_d_run_in node:
6347                         // https://css-tricks.com/run-in/
6348                         // https://developer.mozilla.org/en-US/docs/Web/CSS/display
6349                         //   "If the adjacent sibling of the element defined as "display: run-in" box
6350                         //   is a block box, the run-in box becomes the first inline box of the block
6351                         //   box that follows it. "
6352                         // Hopefully only used for footnotes in fb2 where the footnote number
6353                         // is in a block element, and the footnote text in another.
6354                         // fb2.css sets the first block to be "display: run-in" as an
6355                         // attempt to render both on the same line:
6356                         //   <section id="n1">
6357                         //     <title style="display: run-in; font-weight: bold;">
6358                         //       <p>1</p>
6359                         //     </title>
6360                         //     <p>Text footnote</p>
6361                         //   </section>
6362                         //
6363                         // This node might be that second block: look if preceeding node
6364                         // is "run-in", and if it is, bring them both in an autoBoxing.
6365                         ldomNode * prev = getChildNode(i-1);
6366                         ldomNode * inBetweenTextNode = NULL;
6367                         if ( prev->isText() && i-1>0 ) { // some possible empty text in between
6368                             inBetweenTextNode = prev;
6369                             prev = getChildNode(i-2);
6370                         }
6371                         if ( prev->isElement() && prev->getStyle()->display == css_d_run_in ) {
6372                             bool do_autoboxing = true;
6373                             int run_in_idx = inBetweenTextNode ? i-2 : i-1;
6374                             int block_idx = i;
6375                             if ( inBetweenTextNode ) {
6376                                 lString32 text = inBetweenTextNode->getText();
6377                                 if ( IsEmptySpace(text.c_str(), text.length() ) ) {
6378                                     removeChildren(i-1, i-1);
6379                                     block_idx = i-1;
6380                                 }
6381                                 else {
6382                                     do_autoboxing = false;
6383                                 }
6384                             }
6385                             if ( do_autoboxing ) {
6386                                 CRLog::debug("Autoboxing run-in items");
6387                                 // Sadly, to avoid having an erm_final inside another erm_final,
6388                                 // we need to reset the block node to be inline (but that second
6389                                 // erm_final would have been handled as inline anyway, except
6390                                 // for possibly updating the strut height/baseline).
6391                                 node->recurseMatchingElements( resetRendMethodToInline, isNotBoxingInlineBoxNode );
6392                                 // No need to autobox if there are only 2 children (the run-in and this box)
6393                                 if ( getChildCount()!=2 ) { // autobox run-in
6394                                     autoboxChildren( run_in_idx, block_idx, handleFloating );
6395                                 }
6396                             }
6397                             i = run_in_idx;
6398                         }
6399                     }
6400                 }
6401                 // check types after autobox
6402                 detectChildTypes( this, hasBlockItems, hasInline, hasInternalTableItems, hasFloating, handleFloating );
6403                 if ( hasInline ) {
6404                     // Should not happen when autoboxing has been done above - but
6405                     // if we couldn't, fallback to erm_final that will render all
6406                     // children as inline
6407                     setRendMethod( erm_final );
6408                 } else {
6409                     // All inlines have been wrapped into block autoBoxing elements
6410                     // (themselves erm_final): we can be erm_block
6411                     setRendMethod( erm_block );
6412                 }
6413             }
6414         }
6415     }
6416 
6417     if ( hasInternalTableItems && BLOCK_RENDERING(rend_flags, COMPLETE_INCOMPLETE_TABLES) && getRendMethod() == erm_block ) {
6418         // We have only block items, whether the original ones or the
6419         // autoBoxing nodes we created to wrap inlines, and all empty
6420         // inlines have been removed.
6421         // Some of these block items are css_d_table_cell, css_d_table_row...:
6422         // if this node (their parent) has not the expected css_d_table_row
6423         // or css_d_table display style, we are an unproper parent: we want
6424         // to add the missing parent(s) as wrapper(s) between this node and
6425         // these children.
6426         // (If we ended up not being erm_block, and we contain css_d_table_*
6427         // elements, everything is already messed up.)
6428         // Note: we first used the same <autoBoxing> element used to box
6429         // inlines as the table wrapper, which was fine, except in some edge
6430         // cases where some real autoBoxing were wrongly re-used as the tabular
6431         // wrapper (and we ended up having erm_final containing other erm_final
6432         // which were handled just like erm_inline with ugly side effects...)
6433         // So, best to introduce a decicated element: <tabularBox>.
6434         //
6435         // We follow rules from section "Generate missing parents" in:
6436         //   https://www.w3.org/TR/CSS22/tables.html#anonymous-boxes
6437         //   https://www.w3.org/TR/css-tables-3/#fixup (clearer than previous one)
6438         // Note: we do that not in the order given by the specs... As we walk
6439         // nodes deep first, we are here first "generating missing parents".
6440         // When walking up, and meeting a real css_d_table element, or
6441         // below when adding a generated erm_table tabularBox, we call
6442         // initTableRendMethods(0), which will "generate missing child wrappers".
6443         // Not really sure both orderings are equivalent, but let's hope it's ok...
6444 
6445         // So, let's generate missing parents:
6446 
6447         // "An anonymous table-row box must be generated around each sequence
6448         // of consecutive table-cell boxes whose parent is not a table-row."
6449         if ( d != css_d_table_row ) { // We're not a table row
6450             // Look if we have css_d_table_cell that we must wrap in a proper erm_table_row
6451             int last_table_cell = -1;
6452             int first_table_cell = -1;
6453             int last_visible_child = -1;
6454             bool did_wrap = false;
6455             int len = getChildCount();
6456             for ( int i=len-1; i>=0; i-- ) {
6457                 ldomNode * child = getChildNode(i);
6458                 int cd = child->getStyle()->display;
6459                 int cm = child->getRendMethod();
6460                 if ( cd == css_d_table_cell ) {
6461                     if ( last_table_cell < 0 ) {
6462                         last_table_cell = i;
6463                         // We've met a css_d_table_cell, see if it is followed by
6464                         // tabularBox siblings we might have passed by: they might
6465                         // have been added by initTableRendMethods as a missing
6466                         // children of a css_d_table_row: make them part of the row.
6467                         for (int j=i+1; j<getChildCount(); j++) {
6468                             if ( getChildNode(j)->getNodeId()==el_tabularBox )
6469                                 last_table_cell = j;
6470                             else
6471                                 break;
6472                         }
6473                     }
6474                     if ( i == 0 )
6475                         first_table_cell = 0;
6476                     if ( last_visible_child < 0 )
6477                         last_visible_child = i;
6478                 }
6479                 else if ( last_table_cell >= 0 && child->getNodeId()==el_tabularBox ) {
6480                     // We've seen a css_d_table_cell and we're seeing a tabularBox:
6481                     // it might have been added by initTableRendMethods as a missing
6482                     // children of a css_d_table_row: make it part of the row
6483                     if ( i == 0 )
6484                         first_table_cell = 0;
6485                     if ( last_visible_child < 0 )
6486                         last_visible_child = i;
6487                 }
6488                 else if ( cd == css_d_none || cm == erm_invisible ) {
6489                     // Can be left inside or outside the wrap
6490                     if ( i == 0 && last_table_cell >= 0 ) {
6491                         // Include it if first and we're wrapping
6492                         first_table_cell = 0;
6493                     }
6494                 }
6495                 else {
6496                     if ( last_table_cell >= 0)
6497                         first_table_cell = i+1;
6498                     if ( last_visible_child < 0 )
6499                         last_visible_child = i;
6500                 }
6501                 if ( first_table_cell >= 0 ) {
6502                     if ( first_table_cell == 0 && last_table_cell == last_visible_child
6503                                 && getNodeId()==el_tabularBox && !did_wrap ) {
6504                         // All children are table cells, and we're not css_d_table_row,
6505                         // but we are a tabularBox!
6506                         // We were most probably created here in a previous rendering,
6507                         // so just set us to be the anonymous table row.
6508                         #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6509                             printf("initNodeRendMethod: (reused)wrapping unproper table cells %d>%d\n",
6510                                         first_table_cell, last_table_cell);
6511                         #endif
6512                         setRendMethod( erm_table_row );
6513                     }
6514                     else {
6515                         #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6516                             printf("initNodeRendMethod: wrapping unproper table cells %d>%d\n",
6517                                         first_table_cell, last_table_cell);
6518                         #endif
6519                         ldomNode * tbox = boxWrapChildren(first_table_cell, last_table_cell, el_tabularBox);
6520                         if ( tbox && !tbox->isNull() ) {
6521                             tbox->initNodeStyle();
6522                             tbox->setRendMethod( erm_table_row );
6523                         }
6524                         did_wrap = true;
6525                     }
6526                     last_table_cell = -1;
6527                     first_table_cell = -1;
6528                 }
6529             }
6530         }
6531 
6532         // "An anonymous table or inline-table box must be generated around each
6533         // sequence of consecutive proper table child boxes which are misparented."
6534         // Not sure if we should skip that for some values of this node's
6535         // style->display among css_d_table*. Let's do as litterally as the specs.
6536         int last_misparented = -1;
6537         int first_misparented = -1;
6538         int last_visible_child = -1;
6539         bool did_wrap = false;
6540         int len = getChildCount();
6541         for ( int i=len-1; i>=0; i-- ) {
6542             ldomNode * child = getChildNode(i);
6543             int cd = child->getStyle()->display;
6544             int cm = child->getRendMethod();
6545             bool is_misparented = false;
6546             if ( (cd == css_d_table_row || cm == erm_table_row)
6547                             && d != css_d_table && d != css_d_table_row_group
6548                             && d != css_d_table_header_group && d != css_d_table_footer_group ) {
6549                 // A table-row is misparented if its parent is neither a table-row-group
6550                 // nor a table-root box (we include by checking cm==erm_table_row any
6551                 // anonymous table row created just above).
6552                 is_misparented = true;
6553             }
6554             else if ( cd == css_d_table_column && d != css_d_table && d != css_d_table_column_group ) {
6555                 // A table-column box is misparented if its parent is neither
6556                 // a table-column-group box nor a table-root box.
6557                 is_misparented = true;
6558             }
6559             else if ( d != css_d_table && (cd == css_d_table_row_group || cd == css_d_table_header_group
6560                                             || cd == css_d_table_footer_group || cd == css_d_table_column_group
6561                                             || cd == css_d_table_caption ) ) {
6562                 // A table-row-group, table-column-group, or table-caption box is misparented
6563                 // if its parent is not a table-root box.
6564                 is_misparented = true;
6565             }
6566             if ( is_misparented ) {
6567                 if ( last_misparented < 0 ) {
6568                     last_misparented = i;
6569                     // As above for table cells: grab passed-by tabularBox siblings
6570                     // to include them in the wrap
6571                     for (int j=i+1; j<getChildCount(); j++) {
6572                         if ( getChildNode(j)->getNodeId()==el_tabularBox )
6573                             last_misparented = j;
6574                         else
6575                             break;
6576                     }
6577                 }
6578                 if (i == 0)
6579                     first_misparented = 0;
6580                 if ( last_visible_child < 0 )
6581                     last_visible_child = i;
6582             }
6583             else if ( last_misparented >= 0 && child->getNodeId()==el_tabularBox ) {
6584                 // As above for table cells: include tabularBox siblings in the wrap
6585                 if (i == 0)
6586                     first_misparented = 0;
6587                 if ( last_visible_child < 0 )
6588                     last_visible_child = i;
6589             }
6590             else if ( cd == css_d_none || cm == erm_invisible ) {
6591                 // Can be left inside or outside the wrap
6592                 if ( i == 0 && last_misparented >= 0 ) {
6593                     // Include it if first and we're wrapping
6594                     first_misparented = 0;
6595                 }
6596             }
6597             else {
6598                 if ( last_misparented >= 0 )
6599                     first_misparented = i+1;
6600                 if ( last_visible_child < 0 )
6601                     last_visible_child = i;
6602             }
6603             if ( first_misparented >= 0 ) {
6604                 if ( first_misparented == 0 && last_misparented == last_visible_child
6605                             && getNodeId()==el_tabularBox && !did_wrap ) {
6606                     // All children are misparented, and we're not css_d_table,
6607                     // but we are a tabularBox!
6608                     // We were most probably created here in a previous rendering,
6609                     // so just set us to be the anonymous table.
6610                     #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6611                         printf("initNodeRendMethod: (reused)wrapping unproper table children %d>%d\n",
6612                                     first_misparented, last_misparented);
6613                     #endif
6614                     setRendMethod( erm_table );
6615                     initTableRendMethods( this, 0 );
6616                 }
6617                 else {
6618                     #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6619                         printf("initNodeRendMethod: wrapping unproper table children %d>%d\n",
6620                                     first_misparented, last_misparented);
6621                     #endif
6622                     ldomNode * tbox = boxWrapChildren(first_misparented, last_misparented, el_tabularBox);
6623                     if ( tbox && !tbox->isNull() ) {
6624                         tbox->initNodeStyle();
6625                         tbox->setRendMethod( erm_table );
6626                         initTableRendMethods( tbox, 0 );
6627                     }
6628                     did_wrap = true;
6629                 }
6630                 last_misparented = -1;
6631                 first_misparented = -1;
6632                 // Note:
6633                 //   https://www.w3.org/TR/css-tables-3/#fixup
6634                 //   "An anonymous table or inline-table box must be generated
6635                 //    around [...] If the box's parent is an inline, run-in, or
6636                 //    ruby box (or any box that would perform inlinification of
6637                 //    its children), then an inline-table box must be generated;
6638                 //    otherwise it must be a table box."
6639                 // We don't handle the "inline parent > inline-table" rule,
6640                 // because of one of the first checks at top of this function:
6641                 // if this node (the parent) is css_d_inline, we didn't have
6642                 // any detectChildTypes() and autoBoxing happening, stayed erm_inline
6643                 // and didn't enter this section to do the tabularBox wrapping.
6644                 // Changing this (incorrect) rule for css_d_inline opens many
6645                 // bigger issues, so let's not support this (rare) case here.
6646                 // So:
6647                 //   <div>Some text <span style="display: table-cell">table-cell</span> and more text.</div>
6648                 // will properly have the cell tabularBoxes'ed, which will be
6649                 // inserted between 2 autoBoxing (the text nodes), because their
6650                 // container is css_d_block DIV.
6651                 // While:
6652                 //   <div><span>Some text <span style="display: table-cell">table-cell</span> and more text.</span></div>
6653                 // as the container is a css_d_inline SPAN, nothing will happen
6654                 // and everything will be reset to erm_inline. The parent DIV
6655                 // will just see that it contains a single erm_inline SPAN,
6656                 // and won't do any boxing.
6657             }
6658         }
6659     }
6660 
6661     if ( d == css_d_ruby && BLOCK_RENDERING(rend_flags, ENHANCED) ) {
6662         // Ruby input can be quite loose and have various tag strategies (mono/group,
6663         // interleaved/tabular, double sided). Moreover, the specs have evolved between
6664         // 2001 and 2020 (<rbc> tag no more mentioned in 2020; <rtc> being just another
6665         // semantic container for Mozilla, and can be preceded by a bunch of <rt> which
6666         // are pronunciation containers, that don't have to be in an <rtc>...)
6667         // Moreover, various samples on the following pages don't close tags, and expect
6668         // the HTML parser to do that. We do that only when parsing .html files, but
6669         // we don't when parsing .epub files as they are expected to be balanced XHTML.
6670         //
6671         // References:
6672         //  https://www.w3.org/International/articles/ruby/markup
6673         //  https://www.w3.org/TR/ruby-use-cases/ differences between XHTML, HTML5 & HTML Extensions
6674         //  https://www.w3.org/TR/ruby/ Ruby Annotation, 2001
6675         //  http://darobin.github.io/html-ruby/ HTML Ruby Markup Extensions, 2015
6676         //  https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-ruby-element HTML Living standard
6677         //  https://drafts.csswg.org/css-ruby/ CSS Ruby Layout, 2020
6678         //  https://developer.mozilla.org/en-US/docs/Web/HTML/Element/rtc
6679         //  https://chenhuijing.com/blog/html-ruby/ All about the HTML <ruby> element (in 2016)
6680         //  https://github.com/w3c/html/issues/291 How to handle legacy Ruby content that may use <rbc>?
6681         //  https://w3c.github.io/i18n-tests/results/ruby-html Browsers support
6682         //
6683         // We can handle quite a few of these variations with the following strategy.
6684         //
6685         // We want a <ruby> (which will stay inline) to only contain inlineBox>rubyBox elements
6686         // that will be set up to be rendered just as an inline-table:
6687         //   <ruby, "display: ruby", erm_inline>
6688         //     <inlineBox, erm_inline>  [1 or more, 1 per ruby segment]
6689         //       <rubyBox, erm_table>   [1]
6690         //         <rbc or rubyBox, erm_table_row>  [1]
6691         //           <rb or rubyBox, erm_final> base text </rb or /rubyBox>  [1 or more]
6692         //         </rbc or /rubyBox>
6693         //         <rtc or rubyBox, erm_table_row>  [1 or more, usually 1 or 2]
6694         //           <rt or rubyBox, erm_final> annotation text </rt or /rubyBox>  [1 or more]
6695         //         </rtc or /rubyBox>
6696         //       </rubyBox>
6697         //     </inlineBox>
6698         //     [some possible empty space text nodes between ruby segments]
6699         //   </ruby>
6700         //
6701         // (The re-ordering of the table rows, putting the first "rtc" above the "rbc",
6702         // will be done in renderTable(), as it is just needed there in its own internal
6703         // table data structures. The DOM will stay in its original order: the "rbc"
6704         // staying before followup "rtc", which will give us the correct baseline to use
6705         // for the whole structure: the baseline of the "rbc".
6706         //
6707         // We need to build all this when we meet a simple:
6708         //   <ruby>text1<rt>annot1</rt>text2<rt>annot2</rt> </ruby>
6709         // The only element we'll nearly always find inside a <ruby> is <rt>,
6710         // (but we can find sometimes a single <rtc> with no <rt>).
6711         //
6712         // One thing we might not handle well is white-space, which, depending on where
6713         // it happens, should be dropped or not. We drop some by putting it between table
6714         // elements, we keep some by putting it between the inlineBoxes, but not really
6715         // according to the complex rules in https://drafts.csswg.org/css-ruby/#box-fixup
6716         //
6717         // Some other notes:
6718         // - We can style some ruby elements, including some of the rubyBox we add, with:
6719         //     rt, rubyBox[T=rt] { font-size: 50%; font-variant-east-asian: ruby; }
6720         //     rubyBox { border: 1px solid green; }
6721         // - Note that on initial loading (HTML parsing, and this boxing here happening,
6722         //   the real ruby sub-elements present in the HTML will already be there in the
6723         //   DOM and have their style set, possibly inherited from their parent (the <ruby>
6724         //   element) *before* this boxing is happening. If we add a rubyBox, and it
6725         //   becomes the parent of a rb or rt, these rb or rt won't inherite from the
6726         //   rubyBox (that we may style). They also won't get styled by CSS selectors
6727         //   like "rubyBox > rt".
6728         //   But on a next re-renderings, as the DOM is kept, all this will happen.
6729         //   So: avoid such rules, and avoid setting inherit'able properties to
6730         //   the rubyBox elements; otherwise we may get different look on initial
6731         //   loading and on subsequent re-renderings.
6732         // - With some ruby constructs, the behaviour and rendering might be different
6733         //   whether we're parsing a HTML file or an EPUB file:
6734         //   - the HTML parser is able to auto-close tags, which is needed with most
6735         //     of the samples in the above URLs (but may fail on nested ruby with
6736         //     unbalanced tags, as auto-closing in one ruby might kill the other).
6737         //   - the EPUB XHTML parser expects balanced tags, and may work with nested
6738         //     ruby, but will not process ruby with unbalanced tags.
6739 
6740         // To make things easier to follow below (with the amount of nested rubyBoxes...),
6741         // we name the variables used to hold each of them:
6742         //   ibox1 : the inlineBox wrapping the 1st level rubyBox that will be erm_table (inline-table)
6743         //   rbox1 : the 1st level rubyBox that will be erm_table
6744         //   rbox2 : the 2nd level rubyBox that will be erm_table_row, like existing <rbc> and <rtc>
6745         //   rbox3 : the 3rd level rubyBox that will be a table cell (erm_final or erm_block), like existing <rb> and <rt>
6746 
6747         // Check if we have already wrapped: we should contain only <inlineBox>'ed <rubyBox>es
6748         // Note that <ruby style="display: ruby"> is all that is required to trigger this. When
6749         // wanting to disable ruby support, it's enough to just set <ruby> to "display: inline":
6750         // a change in "display:" value will cause a nodeDisplayStyleHash mismatch, and propose
6751         // a full reload with DOM rebuild, which will forget all the rubyBox we added.
6752         int len = getChildCount();
6753         bool needs_wrapping = len > 0;
6754         for ( int i=0; i<len; i++ ) {
6755             ldomNode * child = getChildNode(i);
6756             if ( child->isElement() && child->getNodeId() == el_inlineBox
6757                     && child->getChildCount() > 0 && child->getChildNode(0)->getNodeId() == el_rubyBox ) {
6758                 // If we find one <inlineBox><rubyBox>, we created that previously and we ensured
6759                 // there are only rubyBoxes, empty text nodes, or some trailing inline nodes
6760                 // not followed by a <rt>: no need for more checks and work.
6761                 needs_wrapping = false;
6762                 break;
6763             }
6764         }
6765         if ( needs_wrapping ) {
6766             // 1) Wrap everything up to (and including consecutive ones) <rt> <rtc> <rp>
6767             // into <inlineBox><rubyBox>, and continue doing it after that.
6768             int first_to_wrap = -1;
6769             int last_to_wrap = -1;
6770             for ( int i=0; i<=len; i++ ) {
6771                 ldomNode * child;
6772                 lInt16 elemId;
6773                 bool eoc = i == len; // end of children
6774                 if ( !eoc ) {
6775                     child = getChildNode(i);
6776                     if ( child->isElement() ) {
6777                         elemId = child->getNodeId();
6778                     }
6779                     else {
6780                         lString32 s = child->getText();
6781                         elemId = IsEmptySpace(s.c_str(), s.length()) ? -2 : -1;
6782                         // When meeting an empty space (elemId==-2), we'll delay wrapping
6783                         // decision to when we process the next node.
6784                         // We'll also not start a wrap with it.
6785                     }
6786                 }
6787                 if ( last_to_wrap >= 0 && (eoc || (elemId != el_rt && elemId != el_rtc && elemId != el_rp && elemId != -2) ) ) {
6788                     if ( first_to_wrap < 0 )
6789                         first_to_wrap = 0;
6790                     ldomNode * rbox1 = boxWrapChildren(first_to_wrap, last_to_wrap, el_rubyBox);
6791                     if ( rbox1 && !rbox1->isNull() ) {
6792                         // Set an attribute for the kind of container we made (Ruby Segment)
6793                         // so we can style it via CSS.
6794                         rbox1->setAttributeValue(LXML_NS_NONE, attr_T, U"rseg");
6795                         rbox1->initNodeStyle();
6796                         // Update loop index and end
6797                         int removed = last_to_wrap - first_to_wrap;
6798                         i = i - removed;
6799                         len = len - removed;
6800                         // And wrap this rubyBox in an inlineBox
6801                         ldomNode * ibox1 = insertChildElement( first_to_wrap, LXML_NS_NONE, el_inlineBox );
6802                         moveItemsTo( ibox1, first_to_wrap+1, first_to_wrap+1 );
6803                         ibox1->initNodeStyle();
6804                     }
6805                     first_to_wrap = -1;
6806                     last_to_wrap = -1;
6807                 }
6808                 if (eoc)
6809                     break;
6810                 if ( elemId == -1 ) { // isText(), non empty
6811                     if ( first_to_wrap < 0 ) {
6812                         first_to_wrap = i;
6813                     }
6814                 }
6815                 else if ( elemId == -2 ) { // isText(), empty
6816                     // Don't start a wrap on it
6817                 }
6818                 else {
6819                     if ( first_to_wrap < 0 ) {
6820                         first_to_wrap = i;
6821                     }
6822                     if ( elemId == el_rt || elemId == el_rtc || elemId == el_rp ) {
6823                         last_to_wrap = i;
6824                         // Don't wrap yet: there can be followup other RT/RTC
6825                     }
6826                 }
6827             }
6828             // 2) Enter each rubyBox we have created (they will be inline-table),
6829             // and wrap its content as needed to make rows (of rubyBox, rbc and rtc)
6830             // and cells (of rubyBox, rb and rt).
6831             len = getChildCount();
6832             for ( int i=0; i<len; i++ ) {
6833                 ldomNode * ibox1 = getChildNode(i);
6834                 if ( !ibox1->isElement() || ibox1->getNodeId() != el_inlineBox )
6835                     continue;
6836                 ldomNode * rbox1 = ibox1->getChildCount() > 0 ? ibox1->getChildNode(0) : NULL;
6837                 if ( !rbox1 || !rbox1->isElement() || rbox1->getNodeId() != el_rubyBox )
6838                     continue;
6839                 // (Each rbox1 will be set erm_table)
6840                 int len1 = rbox1->getChildCount();
6841                 int first_to_wrap = -1;
6842                 bool ruby_base_wrap_done = false;
6843                 bool ruby_base_present = false;
6844                 for ( int i1=0; i1<=len1; i1++ ) {
6845                     ldomNode * child;
6846                     lInt16 elemId;
6847                     bool eoc = i1 == len1; // end of children
6848                     if ( !eoc ) {
6849                         child = rbox1->getChildNode(i1);
6850                         if ( child->isElement() ) {
6851                             elemId = child->getNodeId();
6852                         }
6853                         else {
6854                             lString32 s = child->getText();
6855                             elemId = IsEmptySpace(s.c_str(), s.length()) ? -2 : -1;
6856                             // When meeting an empty space (elemId==-2), we'll delay wrapping
6857                             // decision to when we process the next node.
6858                             // We'll also not start a wrap with it.
6859                         }
6860                     }
6861                     if ( first_to_wrap >= 0 && (
6862                                     eoc
6863                                  || ( !ruby_base_wrap_done && (elemId == el_rtc || elemId == el_rt || elemId == el_rp) )
6864                                  || (  ruby_base_wrap_done && elemId == el_rtc )
6865                                 ) ) {
6866                         ldomNode * rbox2 = rbox1->boxWrapChildren(first_to_wrap, i1-1, el_rubyBox);
6867                         if ( rbox2 && !rbox2->isNull() ) {
6868                             // Set an attribute for the kind of container we made (<rbc> or <rtc>-like),
6869                             // so we can style it like real <rbc> and <rtc> via CSS.
6870                             rbox2->setAttributeValue(LXML_NS_NONE, attr_T, ruby_base_wrap_done ? U"rtc" : U"rbc");
6871                             rbox2->initNodeStyle();
6872                             // Update loop index and end
6873                             int removed = i1-1 - first_to_wrap;
6874                             i1 = i1 - removed;
6875                             len1 = len1 - removed;
6876                         }
6877                         first_to_wrap = -1;
6878                         if ( !eoc && !ruby_base_wrap_done ) {
6879                             ruby_base_present = true; // We did create it
6880                         }
6881                         if (eoc)
6882                             break;
6883                     }
6884                     if ( elemId == -1 ) { // isText(), non empty
6885                         if ( first_to_wrap < 0 ) {
6886                             first_to_wrap = i1;
6887                         }
6888                     }
6889                     else if ( elemId == -2 ) { // isText(), empty
6890                         // Don't start a wrap on it
6891                     }
6892                     else {
6893                         if ( elemId == el_rbc || elemId == el_rtc ) {
6894                             // These are fine containers at this level.
6895                             // (If el_rbc, we shouldn't have found anything before
6896                             // it; if we did, just ignore it.)
6897                             first_to_wrap = -1;
6898                             ruby_base_wrap_done = true;
6899                             if ( elemId == el_rbc )
6900                                 ruby_base_present = true;
6901                         }
6902                         else if ( first_to_wrap < 0 ) {
6903                             first_to_wrap = i1;
6904                             if ( elemId == el_rt || elemId == el_rp ) {
6905                                 ruby_base_wrap_done = true;
6906                             }
6907                         }
6908                     }
6909                 }
6910                 if ( !ruby_base_present ) {
6911                     // <ruby><rt>annotation</rt></ruby> : add rubyBox for empty base text
6912                     ldomNode * rbox2 = rbox1->insertChildElement( 0, LXML_NS_NONE, el_rubyBox );
6913                     rbox2->setAttributeValue(LXML_NS_NONE, attr_T, U"rbc");
6914                     rbox2->initNodeStyle();
6915                 }
6916                 // rbox1 now contains only <rbc>, <rtc> or <rubyBox> (which will be set erm_table_row)
6917                 // 3) for each, ensure its content is <rb>, <rt>, and if not, wrap it in
6918                 // a <rubyBox> (these will be all like table cells, set erm_final)
6919                 len1 = rbox1->getChildCount();
6920                 bool ruby_base_seen = false;
6921                 for ( int i1=0; i1<len1; i1++ ) {
6922                     ldomNode * rbox2 = rbox1->getChildNode(i1);
6923                     if ( !rbox2->isElement() )
6924                         continue;
6925                     lInt16 elemId = rbox2->getNodeId();
6926                     lInt16 expected_child_elem_id;
6927                     if ( elemId == el_rbc ) {
6928                         expected_child_elem_id = el_rb;
6929                     }
6930                     else if ( elemId == el_rtc ) {
6931                         expected_child_elem_id = el_rt;
6932                     }
6933                     else if ( elemId == el_rubyBox ) {
6934                         expected_child_elem_id = ruby_base_seen ? el_rt : el_rb;
6935                     }
6936                     else { // unexpected
6937                         continue;
6938                     }
6939                     ruby_base_seen = true; // We're passing by a container, the first one being the base
6940                     bool has_expected = false;
6941                     int len2 = rbox2->getChildCount();
6942                     for ( int i2=0; i2<len2; i2++ ) {
6943                         ldomNode * child = rbox2->getChildNode(i2);
6944                         lInt16 childElemId = child->isElement() ? child->getNodeId() : -1;
6945                         if ( childElemId == expected_child_elem_id ) {
6946                             // If a single expected is found, assume everything is fine
6947                             // (other badly wrapped elements will just be ignored and invisible)
6948                             has_expected = true;
6949                             break;
6950                         }
6951                     }
6952                     if ( !has_expected ) {
6953                         // Wrap everything into a rubyBox
6954                         if ( len2 > 0 ) { // some children to wrap
6955                             ldomNode * rbox3 = rbox2->boxWrapChildren(0, len2-1, el_rubyBox);
6956                             if ( rbox3 && !rbox3->isNull() ) {
6957                                 rbox3->setAttributeValue(LXML_NS_NONE, attr_T, expected_child_elem_id == el_rb ? U"rb" : U"rt");
6958                                 if ( elemId == el_rtc ) {
6959                                     // Firefox makes a <rtc>text</rtc> (without any <rt>) span the whole involved base
6960                                     rbox3->setAttributeValue(LXML_NS_NONE, attr_rbspan, U"99"); // (our max supported)
6961                                 }
6962                                 rbox3->initNodeStyle();
6963                             }
6964                         }
6965                         else { // no child to wrap
6966                             // We need to insert an empty element to play the role of a <td> for
6967                             // the table rendering code to work correctly.
6968                             ldomNode * rbox3 = rbox2->insertChildElement( 0, LXML_NS_NONE, el_rubyBox );
6969                             rbox3->setAttributeValue(LXML_NS_NONE, attr_T, expected_child_elem_id == el_rb ? U"rb" : U"rt");
6970                             rbox3->initNodeStyle();
6971                             // We need to add some text for the cell to ensure its height.
6972                             // We add a ZERO WIDTH SPACE, which will not collapse into nothing
6973                             rbox3->insertChildText(U"\x200B");
6974                         }
6975                     }
6976                 }
6977             }
6978         }
6979         // All wrapping done, or assumed to have already been done correctly.
6980         // We can set the rendering methods to make all this a table.
6981         // All unexpected elements will be erm_invisible
6982         len = getChildCount();
6983         for ( int i=0; i<len; i++ ) {
6984             ldomNode * ibox1 = getChildNode(i);
6985             if ( !ibox1->isElement() || ibox1->getNodeId() != el_inlineBox )
6986                 continue;
6987             ibox1->setRendMethod( erm_inline );
6988             ldomNode * rbox1 = ibox1->getChildCount() > 0 ? ibox1->getChildNode(0) : NULL;
6989             if ( rbox1 && rbox1->isElement() && rbox1->getNodeId() == el_rubyBox ) {
6990                 // First level rubyBox: each will be an inline table
6991                 rbox1->setRendMethod( erm_table );
6992                 int len1 = rbox1->getChildCount();
6993                 for ( int i1=0; i1<len1; i1++ ) {
6994                     ldomNode * rbox2 = rbox1->getChildNode(i1);
6995                     if ( rbox2->isElement() ) {
6996                         rbox2->setRendMethod( erm_invisible );
6997                         lInt16 rb2elemId = rbox2->getNodeId();
6998                         if ( rb2elemId == el_rubyBox || rb2elemId == el_rbc || rb2elemId == el_rtc ) {
6999                             // Second level rubyBox: each will be a table row
7000                             rbox2->setRendMethod( erm_table_row );
7001                             int len2 = rbox2->getChildCount();
7002                             for ( int i2=0; i2<len2; i2++ ) {
7003                                 ldomNode * rbox3 = rbox2->getChildNode(i2);
7004                                 if ( rbox3->isElement() ) {
7005                                     rbox3->setRendMethod( erm_invisible );
7006                                     lInt16 rb3elemId = rbox3->getNodeId();
7007                                     if ( rb3elemId == el_rubyBox || rb3elemId == el_rb || rb3elemId == el_rt ) {
7008                                         // Third level rubyBox: each will be a table cell.
7009                                         // (As all it content has previously been reset to erm_inline)
7010                                         //  /\ This is no more true, but we expect to find inline
7011                                         //  content, with possibly some nested ruby.
7012                                         // We can have the cell erm_final.
7013                                         rbox3->setRendMethod( erm_final );
7014                                     }
7015                                     // We let <rp> be invisible like other unexpected elements
7016                                 }
7017                             }
7018                         }
7019                     }
7020                 }
7021             }
7022         }
7023     }
7024 
7025     bool handled_as_float = false;
7026     if (BLOCK_RENDERING(rend_flags, WRAP_FLOATS)) {
7027         // While loading the document, we want to put any element with float:left/right
7028         // inside an internal floatBox element with no margin in its style: this
7029         // floatBox's RenderRectAccessor will have the position and width/height
7030         // of the outer element (with margins inside), while the RenderRectAccessor
7031         // of the wrapped original element itself will have the w/h of the element,
7032         // including borders but excluding margins (as it is done for all elements
7033         // by crengine).
7034         // That makes out the following rules:
7035         // - a floatBox has a single child: the original floating element.
7036         // - a non-floatBox element with style->float_ must be wrapped in a floatBox
7037         //   which will get the same style->float_ (happens in the initial document
7038         //   loading)
7039         // - if it already has a floatBox parent, no need to do it again, just ensure
7040         //   the style->float_ are the same (happens when re-rendering)
7041         // - if the element has lost its style->float_ (style tweak applied), or
7042         //   WRAP_FLOATS disabled, as we can't remove the floatBox (we can't
7043         //   modify the DOM once a cache has been made): update the floatBox's
7044         //   style->float_ and style->display and rendering method to be the same
7045         //   as the element: this will limit the display degradation when such
7046         //   change happen (but a full re-loading will still be suggested to the
7047         //   user, and should probably be accepted).
7048         // So, to allow toggling FLOAT_FLOATBOXES with less chance of getting
7049         // a _nodeDisplayStyleHash change (and so, a need for document reloading),
7050         // it's best to use WRAP_FLOATS even when flat rendering is requested.
7051         //
7052         // Note that, when called in the XML loading phase, we can't update
7053         // a node style (with getStyle(), copystyle(), setStyle()) as, for some reason
7054         // not pinpointed, it could affect and mess with the upcoming node parsing.
7055         // We can just set the style of an element we add (and only once, setting it
7056         // twice would cause the same mess). But in the re-rendering phase, we can
7057         // update a node style as much as we want.
7058         bool isFloating = getStyle()->float_ > css_f_none;
7059         bool isFloatBox = (getNodeId() == el_floatBox);
7060         if ( isFloating || isFloatBox ) {
7061             handled_as_float = true;
7062             ldomNode * parent = getParentNode();
7063             bool isFloatBoxChild = (parent && (parent->getNodeId() == el_floatBox));
7064             if ( isFloatBox ) {
7065                 // Wrapping floatBox already made
7066                 if (getChildCount() != 1) {
7067                     CRLog::error("floatBox with zero or more than one child");
7068                     crFatalError();
7069                 }
7070                 // Update floatBox style according to child's one
7071                 ldomNode * child = getChildNode(0);
7072                 css_style_ref_t child_style = child->getStyle();
7073                 css_style_ref_t my_style = getStyle();
7074                 css_style_ref_t my_new_style( new css_style_rec_t );
7075                 copystyle(my_style, my_new_style);
7076                 my_new_style->float_ = child_style->float_;
7077                 if (child_style->display <= css_d_inline) { // when !PREPARE_FLOATBOXES
7078                     my_new_style->display = css_d_inline; // become an inline wrapper
7079                 }
7080                 else if (child_style->display == css_d_none) {
7081                     my_new_style->display = css_d_none; // stay invisible
7082                 }
7083                 else { // everything else (including tables) must be wrapped by a block
7084                     my_new_style->display = css_d_block;
7085                 }
7086                 setStyle(my_new_style);
7087                 // When re-rendering, setNodeStyle() has already been called to set
7088                 // our style and font, so no need for calling initNodeFont() here,
7089                 // as we didn't change anything related to font in the style (and
7090                 // calling it can cause a style hash mismatch for some reason).
7091 
7092                 // Update floatBox rendering method according to child's one
7093                 // It should be erm_block by default (the child can be erm_final
7094                 // if it contains text), except if the child has stayed inline
7095                 // when !PREPARE_FLOATBOXES
7096                 if (child->getRendMethod() == erm_inline)
7097                     setRendMethod( erm_inline );
7098                 else if (child->getRendMethod() == erm_invisible)
7099                     setRendMethod( erm_invisible );
7100                 else
7101                     setRendMethod( erm_block );
7102             }
7103             else if ( isFloatBoxChild ) {
7104                 // Already floatBox'ed, nothing special to do
7105             }
7106             else if ( parent ) { // !isFloatBox && !isFloatBoxChild
7107                 // Element with float:, that has not been yet wrapped in a floatBox.
7108                 // Replace this element with a floatBox in its parent children collection,
7109                 // and move it inside, as the single child of this floatBox.
7110                 int pos = getNodeIndex();
7111                 ldomNode * fbox = parent->insertChildElement( pos, LXML_NS_NONE, el_floatBox );
7112                 parent->moveItemsTo( fbox, pos+1, pos+1 ); // move this element from parent into fbox
7113 
7114                 // If we have float:, this just-created floatBox should be erm_block,
7115                 // unless the child has been kept inline
7116                 if ( !BLOCK_RENDERING(rend_flags, PREPARE_FLOATBOXES) && getRendMethod() == erm_inline)
7117                     fbox->setRendMethod( erm_inline );
7118                 else
7119                     fbox->setRendMethod( erm_block );
7120 
7121                 // We want this floatBox to have no real style (and it surely
7122                 // should not have the margins of the child), but it should probably
7123                 // have the inherited properties of the node parent, just like the child
7124                 // had them. We can't just copy the parent style into this floatBox, as
7125                 // we don't want its non-inherited properties like background-color which
7126                 // could be drawn over some other content if this float has some negative
7127                 // margins.
7128                 // So, we can't really do this:
7129                 //    // Move float and display from me into my new fbox parent
7130                 //    css_style_ref_t mystyle = getStyle();
7131                 //    css_style_ref_t parentstyle = parent->getStyle();
7132                 //    css_style_ref_t fboxstyle( new css_style_rec_t );
7133                 //    copystyle(parentstyle, fboxstyle);
7134                 //    fboxstyle->float_ = mystyle->float_;
7135                 //    fboxstyle->display = mystyle->display;
7136                 //    fbox->setStyle(fboxstyle);
7137                 //    fbox->initNodeFont();
7138                 //
7139                 // Best to use lvrend.cpp setNodeStyle(), which will properly set
7140                 // this new node style with inherited properties from its parent,
7141                 // and we made it do this specific propagation of float_ and
7142                 // display from its single children, only when it has styles
7143                 // defined (so, only on initial loading and not on re-renderings).
7144                 setNodeStyle( fbox, parent->getStyle(), parent->getFont() );
7145 
7146                 // We would have liked to reset style->float_ to none in the
7147                 // node we moved in the floatBox, for correctness sake.
7148                 //    css_style_ref_t mynewstyle( new css_style_rec_t );
7149                 //    copystyle(mystyle, mynewstyle);
7150                 //    mynewstyle->float_ = css_f_none;
7151                 //    mynewstyle->display = css_d_block;
7152                 //    setStyle(mynewstyle);
7153                 //    initNodeFont();
7154                 // Unfortunatly, we can't yet re-set a style while the DOM
7155                 // is still being built (as we may be called during the loading
7156                 // phase) without many font glitches.
7157                 // So, we'll have a floatBox with float: that contains a span
7158                 // or div with float: - the rendering code may have to check
7159                 // for that: ->isFloatingBox() was added for that.
7160             }
7161         }
7162     }
7163 
7164     // (If a node is both inline-block and float: left/right, float wins.)
7165     if (BLOCK_RENDERING(rend_flags, BOX_INLINE_BLOCKS) && !handled_as_float) {
7166         // (Similar to what we do above for floats, but simpler.)
7167         // While loading the document, we want to put any element with
7168         // display: inline-block or inline-table inside an internal inlineBox
7169         // element with no margin in its style: this inlineBox's RenderRectAccessor
7170         // will have the width/height of the outer element (with margins inside),
7171         // while the RenderRectAccessor of the wrapped original element itself
7172         // will have the w/h of the element, including borders but excluding
7173         // margins (as it is done for all elements by crengine).
7174         // That makes out the following rules:
7175         // - a inlineBox has a single child: the original inline-block element.
7176         // - an element with style->display: inline-block/inline-table must be
7177         //   wrapped in a inlineBox, which will get the same style->vertical_align
7178         //   (happens in the initial document loading)
7179         // - if it already has a inlineBox parent, no need to do it again, just ensure
7180         //   the style->vertical_align are the same (happens when re-rendering)
7181         // - if the element has lost its style->display: inline-block (style tweak
7182         //   applied), or BOX_INLINE_BLOCKS disabled, as we can't remove the
7183         //   inlineBox (we can't modify the DOM once a cache has been made):
7184         //   the inlineBox and its children will both be set to erm_inline
7185         //   (but as ->display has changed, a full re-loading will be suggested
7186         //   to the user, and should probably be accepted).
7187         // - a inlineBox has ALWAYS ->display=css_d_inline and erm_method=erm_inline
7188         // - a inlineBox child keeps its original ->display, and may have
7189         //   erm_method = erm_final or erm_block (depending on its content)
7190         bool isInlineBlock = (d == css_d_inline_block || d == css_d_inline_table);
7191         bool isInlineBox = (getNodeId() == el_inlineBox);
7192         if ( isInlineBlock || isInlineBox ) {
7193             ldomNode * parent = getParentNode();
7194             bool isInlineBoxChild = (parent && (parent->getNodeId() == el_inlineBox));
7195             if ( isInlineBox ) {
7196                 // Wrapping inlineBox already made
7197                 if (getChildCount() != 1) {
7198                     CRLog::error("inlineBox with zero or more than one child");
7199                     crFatalError();
7200                 }
7201                 // Update inlineBox style according to child's one
7202                 ldomNode * child = getChildNode(0);
7203                 css_style_ref_t child_style = child->getStyle();
7204                 css_style_ref_t my_style = getStyle();
7205                 css_style_ref_t my_new_style( new css_style_rec_t );
7206                 copystyle(my_style, my_new_style);
7207                 if (child_style->display == css_d_inline_block || child_style->display == css_d_inline_table) {
7208                     my_new_style->display = css_d_inline; // become an inline wrapper
7209                     // We need it to have the vertical_align from the child
7210                     // (it's the only style we need for proper inline layout).
7211                     my_new_style->vertical_align = child_style->vertical_align;
7212                     setRendMethod( erm_inline );
7213                 }
7214                 else if ( isEmbeddedBlockBoxingInlineBox(true) ) {
7215                     my_new_style->display = css_d_inline; // wrap bogus "block among inlines" in inline
7216                     setRendMethod( erm_inline );
7217                 }
7218                 else if (child_style->display <= css_d_inline) {
7219                     my_new_style->display = css_d_inline; // wrap inline in inline
7220                     setRendMethod( erm_inline );
7221                 }
7222                 else if (child_style->display == css_d_none) {
7223                     my_new_style->display = css_d_none; // stay invisible
7224                     setRendMethod( erm_invisible );
7225                 }
7226                 else { // everything else must be wrapped by a block
7227                     my_new_style->display = css_d_block;
7228                     setRendMethod( erm_block );
7229                 }
7230                 setStyle(my_new_style);
7231                 // When re-rendering, setNodeStyle() has already been called to set
7232                 // our style and font, so no need for calling initNodeFont() here,
7233                 // as we didn't change anything related to font in the style (and
7234                 // calling it can cause a style hash mismatch for some reason).
7235             }
7236             else if ( isInlineBoxChild ) {
7237                 // Already inlineBox'ed, nothing special to do
7238             }
7239             else if ( parent ) { // !isInlineBox && !isInlineBoxChild
7240                 // Element with display: inline-block/inline-table, that has not yet
7241                 // been wrapped in a inlineBox.
7242                 // Replace this element with a inlineBox in its parent children collection,
7243                 // and move it inside, as the single child of this inlineBox.
7244                 int pos = getNodeIndex();
7245                 ldomNode * ibox = parent->insertChildElement( pos, LXML_NS_NONE, el_inlineBox );
7246                 parent->moveItemsTo( ibox, pos+1, pos+1 ); // move this element from parent into ibox
7247                 ibox->setRendMethod( erm_inline );
7248 
7249                 // We want this inlineBox to have no real style (and it surely
7250                 // should not have the margins of the child), but it should probably
7251                 // have the inherited properties of the node parent, just like the child
7252                 // had them. We can't just copy the parent style into this inlineBox, as
7253                 // we don't want its non-inherited properties like background-color which
7254                 // could be drawn over some other content if this float has some negative
7255                 // margins.
7256                 // Best to use lvrend.cpp setNodeStyle(), which will properly set
7257                 // this new node style with inherited properties from its parent,
7258                 // and we made it do this specific propagation of vertical_align
7259                 // from its single child, only when it has styles defined (so,
7260                 // only on initial loading and not on re-renderings).
7261                 setNodeStyle( ibox, parent->getStyle(), parent->getFont() );
7262             }
7263         }
7264     }
7265 }
7266 #endif
7267 
onBodyExit()7268 void ldomElementWriter::onBodyExit()
7269 {
7270     if ( _isSection )
7271         updateTocItem();
7272 
7273 #if BUILD_LITE!=1
7274     if ( !_document->isDefStyleSet() )
7275         return;
7276     if ( !_bodyEnterCalled ) {
7277         onBodyEnter();
7278     }
7279     if ( _pseudoElementAfterChildIndex >= 0 ) {
7280         if ( _pseudoElementAfterChildIndex != _element->getChildCount()-1 ) {
7281             // Not the last child: move it there
7282             // (moveItemsTo() works just fine when the source node is also the
7283             // target node: remove it, and re-add it, so, adding it at the end)
7284             _element->moveItemsTo( _element, _pseudoElementAfterChildIndex, _pseudoElementAfterChildIndex);
7285         }
7286         // Now that all the real children of this node have had their
7287         // style set, we can init the style of the "After" pseudo
7288         // element, and its rend method as it has no children.
7289         ldomNode * child = _element->getChildNode(_element->getChildCount()-1);
7290         child->initNodeStyle();
7291         child->initNodeRendMethod();
7292     }
7293 //    if ( _element->getStyle().isNull() ) {
7294 //        lString32 path;
7295 //        ldomNode * p = _element->getParentNode();
7296 //        while (p) {
7297 //            path = p->getNodeName() + U"/" + path;
7298 //            p = p->getParentNode();
7299 //        }
7300 //        //CRLog::error("style not initialized for element 0x%04x %s path %s", _element->getDataIndex(), LCSTR(_element->getNodeName()), LCSTR(path));
7301 //        crFatalError();
7302 //    }
7303     _element->initNodeRendMethod();
7304 
7305     if ( _stylesheetIsSet )
7306         _document->getStyleSheet()->pop();
7307 #endif
7308 }
7309 
onText(const lChar32 * text,int len,lUInt32,bool insert_before_last_child)7310 void ldomElementWriter::onText( const lChar32 * text, int len, lUInt32, bool insert_before_last_child )
7311 {
7312     //logfile << "{t";
7313     {
7314         // normal mode: store text copy
7315         // add text node, if not first empty space string of block node
7316         if ( !_isBlock || _element->getChildCount()!=0 || !IsEmptySpace( text, len ) || (_flags&TXTFLG_PRE) ) {
7317             lString8 s8 = UnicodeToUtf8(text, len);
7318             _element->insertChildText(s8, insert_before_last_child);
7319         } else {
7320             //CRLog::trace("ldomElementWriter::onText: Ignoring first empty space of block item");
7321         }
7322     }
7323     //logfile << "}";
7324 }
7325 
7326 
7327 //#define DISABLE_STYLESHEET_REL
7328 #if BUILD_LITE!=1
7329 /// if stylesheet file name is set, and file is found, set stylesheet to its value
applyNodeStylesheet()7330 bool ldomNode::applyNodeStylesheet()
7331 {
7332 #ifndef DISABLE_STYLESHEET_REL
7333     CRLog::trace("ldomNode::applyNodeStylesheet()");
7334     if ( !getDocument()->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) // internal styles are disabled
7335         return false;
7336 
7337     if ( getNodeId() != el_DocFragment && getNodeId() != el_body )
7338         return false;
7339     if ( getNodeId() == el_DocFragment && getDocument()->getContainer().isNull() )
7340         return false;
7341 
7342     // Here, we apply internal stylesheets that have been saved as attribute or
7343     // child element by the HTML parser for EPUB or plain HTML documents.
7344 
7345     // For epub documents, for each included .html in the epub, the first css
7346     // file link may have been put as the value of an added attribute to
7347     // the <DocFragment> element:
7348     //     <DocFragment StyleSheet="path to css file">
7349     //
7350     // For epub and html documents, the content of one or more <head><style>
7351     // elements, as well as all (only the 2nd++ for epub) linked css files,
7352     // with @import url(), have been put into an added child element:
7353     //     <DocFragment><stylesheet>css content</stylesheet><body>...</body></DocFragment>
7354     //     <body><stylesheet>css content</stylesheet>...</body>
7355 
7356     bool stylesheetChanged = false;
7357 
7358     if ( getNodeId() == el_DocFragment && hasAttribute(attr_StyleSheet) ) {
7359         getDocument()->_stylesheet.push();
7360         stylesheetChanged = getDocument()->parseStyleSheet(getAttributeValue(attr_StyleSheet));
7361         if ( !stylesheetChanged )
7362             getDocument()->_stylesheet.pop();
7363     }
7364     if ( getChildCount() > 0 ) {
7365         ldomNode *styleNode = getChildNode(0);
7366 
7367         if ( styleNode && styleNode->getNodeId()==el_stylesheet ) {
7368             if ( false == stylesheetChanged) {
7369                 getDocument()->_stylesheet.push();
7370             }
7371             if ( getDocument()->parseStyleSheet(styleNode->getAttributeValue(attr_href),
7372                                                 styleNode->getText()) ) {
7373                 stylesheetChanged = true;
7374             } else if (false == stylesheetChanged) {
7375                 getDocument()->_stylesheet.pop();
7376             }
7377         }
7378     }
7379     return stylesheetChanged;
7380 #endif
7381     return false;
7382 }
7383 #endif
7384 
addAttribute(lUInt16 nsid,lUInt16 id,const lChar32 * value)7385 void ldomElementWriter::addAttribute( lUInt16 nsid, lUInt16 id, const lChar32 * value )
7386 {
7387     getElement()->setAttributeValue(nsid, id, value);
7388 #if BUILD_LITE!=1
7389     /* This is now done by ldomDocumentFragmentWriter::OnTagOpen() directly,
7390      * as we need to do it too for <DocFragment><stylesheet> tag, and not
7391      * only for <DocFragment StyleSheet="path_to_css_1st_file"> attribute.
7392     if ( id==attr_StyleSheet ) {
7393         _stylesheetIsSet = _element->applyNodeStylesheet();
7394     }
7395     */
7396 #endif
7397 }
7398 
pop(ldomElementWriter * obj,lUInt16 id)7399 ldomElementWriter * ldomDocumentWriter::pop( ldomElementWriter * obj, lUInt16 id )
7400 {
7401     // First check if there's an element with provided id in the stack
7402     //logfile << "{p";
7403     ldomElementWriter * tmp = obj;
7404     for ( ; tmp; tmp = tmp->_parent )
7405     {
7406         //logfile << "-";
7407         if (tmp->getElement()->getNodeId() == id)
7408             break;
7409     }
7410     //logfile << "1";
7411     if (!tmp)
7412     {
7413         // No element in the stack with provided id: nothing to close, stay at current element
7414         //logfile << "-err}";
7415         return obj; // error!!!
7416     }
7417     ldomElementWriter * tmp2 = NULL;
7418     //logfile << "2";
7419     for ( tmp = obj; tmp; tmp = tmp2 )
7420     {
7421         //logfile << "-";
7422         tmp2 = tmp->_parent;
7423         bool stop = (tmp->getElement()->getNodeId() == id);
7424         ElementCloseHandler( tmp->getElement() );
7425         delete tmp;
7426         if ( stop )
7427             return tmp2;
7428     }
7429     /*
7430     logfile << "3 * ";
7431     logfile << (int)tmp << " - " << (int)tmp2 << " | cnt=";
7432     logfile << (int)tmp->getElement()->childCount << " - "
7433             << (int)tmp2->getElement()->childCount;
7434     */
7435     //logfile << "}";
7436     return tmp2;
7437 }
7438 
~ldomElementWriter()7439 ldomElementWriter::~ldomElementWriter()
7440 {
7441     //CRLog::trace("~ldomElementWriter for element 0x%04x %s", _element->getDataIndex(), LCSTR(_element->getNodeName()));
7442     //getElement()->persist();
7443     onBodyExit();
7444 }
7445 
7446 
7447 
7448 
7449 /////////////////////////////////////////////////////////////////
7450 /// ldomDocumentWriter
7451 // Used to parse expected XHTML (possibly made by crengine or helpers) for
7452 // formats: FB2, RTF, WORD, plain text, PDB(txt)
7453 // Also used for EPUB to build a single document, but driven by ldomDocumentFragmentWriter
7454 // for each individual HTML files in the EPUB.
7455 // For all these document formats, it is fed by HTMLParser that does
7456 // convert to lowercase the tag names and attributes.
7457 // ldomDocumentWriter does not do any auto-close of unbalanced tags and
7458 // expect a fully correct and balanced XHTML.
7459 
7460 // overrides
OnStart(LVFileFormatParser * parser)7461 void ldomDocumentWriter::OnStart(LVFileFormatParser * parser)
7462 {
7463     //logfile << "ldomDocumentWriter::OnStart()\n";
7464     // add document root node
7465     //CRLog::trace("ldomDocumentWriter::OnStart()");
7466     if ( !_headerOnly )
7467         _stopTagId = 0xFFFE;
7468     else {
7469         _stopTagId = _document->getElementNameIndex(U"description");
7470         //CRLog::trace( "ldomDocumentWriter() : header only, tag id=%d", _stopTagId );
7471     }
7472     LVXMLParserCallback::OnStart( parser );
7473     _currNode = new ldomElementWriter(_document, 0, 0, NULL);
7474 }
7475 
OnStop()7476 void ldomDocumentWriter::OnStop()
7477 {
7478     //logfile << "ldomDocumentWriter::OnStop()\n";
7479     while (_currNode)
7480         _currNode = pop( _currNode, _currNode->getElement()->getNodeId() );
7481 }
7482 
7483 /// called after > of opening tag (when entering tag body)
7484 // Note to avoid confusion: all tags HAVE a body (their content), so this
7485 // is called on all tags.
7486 // But in this, we do some specifics for tags that ARE a <BODY> tag.
OnTagBody()7487 void ldomDocumentWriter::OnTagBody()
7488 {
7489     // Specific if we meet the <BODY> tag and we have styles to apply and
7490     // store in the DOM
7491     // (This can't happen with EPUBs: the ldomDocumentFragmentWriter that
7492     // drives this ldomDocumentWriter has parsed the HEAD STYLEs and LINKs
7493     // of each individual HTML file, and we see from them only their BODY:
7494     // _headStyleText and _stylesheetLinks are then empty. Styles for EPUB
7495     // are handled in :OnTagOpen() when being a DocFragment and meeting
7496     // the BODY.)
7497     if ( _currNode && _currNode->getElement() && _currNode->getElement()->isNodeName("body") &&
7498             ( !_headStyleText.empty() || _stylesheetLinks.length() > 0 ) ) {
7499         // If we're BODY, and we have meet styles in the previous HEAD
7500         // (links to css files or <STYLE> content), we need to save them
7501         // in an added <body><stylesheet> element so they are in the DOM
7502         // and saved in the cache, and found again when loading from cache
7503         // and applied again when a re-rendering is needed.
7504 
7505         // Make out an aggregated single stylesheet text.
7506         // @import's need to be first in the final stylesheet text
7507         lString32 imports;
7508         for (int i = 0; i < _stylesheetLinks.length(); i++) {
7509             lString32 import("@import url(\"");
7510             import << _stylesheetLinks.at(i);
7511             import << "\");\n";
7512             imports << import;
7513         }
7514         lString32 styleText = imports + _headStyleText.c_str();
7515         _stylesheetLinks.clear();
7516         _headStyleText.clear();
7517 
7518         // It's only at this point that we push() the previous stylesheet state
7519         // and apply the combined style text we made to the document:
7520         if ( _document->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) {
7521             _document->getStyleSheet()->push();
7522             _popStyleOnFinish = true; // superclass ~ldomDocumentWriter() will do the ->pop()
7523             _document->parseStyleSheet(lString32(), styleText);
7524             // printf("applied: %s\n", LCSTR(styleText));
7525             // apply any FB2 stylesheet too, so it's removed too when pop()
7526             _document->applyDocumentStyleSheet();
7527         }
7528         // We needed to add that /\ to the _document->_stylesheet before this
7529         // onBodyEnter \/, for any body {} css declaration to be available
7530         // as this onBodyEnter will apply the current _stylesheet to this BODY node.
7531         _currNode->onBodyEnter();
7532         _flags = _currNode->getFlags(); // _flags may have been updated (if white-space: pre)
7533         // And only after this we can add the <stylesheet> as a first child
7534         // element of this BODY node. It will not be displayed thanks to fb2def.h:
7535         //   XS_TAG1D( stylesheet, true, css_d_none, css_ws_inherit )
7536         OnTagOpen(U"", U"stylesheet");
7537         OnTagBody();
7538         OnText(styleText.c_str(), styleText.length(), 0);
7539         OnTagClose(U"", U"stylesheet");
7540         CRLog::trace("added BODY>stylesheet child element with HEAD>STYLE&LINKS content");
7541     }
7542     else if ( _currNode ) { // for all other tags (including BODY when no style)
7543         _currNode->onBodyEnter();
7544         _flags = _currNode->getFlags(); // _flags may have been updated (if white-space: pre)
7545     }
7546 }
7547 
OnTagOpen(const lChar32 * nsname,const lChar32 * tagname)7548 ldomNode * ldomDocumentWriter::OnTagOpen( const lChar32 * nsname, const lChar32 * tagname )
7549 {
7550     //logfile << "ldomDocumentWriter::OnTagOpen() [" << nsname << ":" << tagname << "]";
7551     //CRLog::trace("OnTagOpen(%s)", UnicodeToUtf8(lString32(tagname)).c_str());
7552     lUInt16 id = _document->getElementNameIndex(tagname);
7553     lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
7554 
7555     // Set a flag for OnText to accumulate the content of any <HEAD><STYLE>
7556     if ( id == el_style && _currNode && _currNode->getElement()->getNodeId() == el_head ) {
7557         _inHeadStyle = true;
7558     }
7559 
7560     // For EPUB, when ldomDocumentWriter is driven by ldomDocumentFragmentWriter:
7561     // if we see a BODY coming and we are a DocFragment, its time to apply the
7562     // styles set to the DocFragment before switching to BODY (so the styles can
7563     // be applied to BODY)
7564     if (id == el_body && _currNode && _currNode->_element->getNodeId() == el_DocFragment) {
7565         _currNode->_stylesheetIsSet = _currNode->getElement()->applyNodeStylesheet();
7566         // _stylesheetIsSet will be used to pop() the stylesheet when
7567         // leaving/destroying this DocFragment ldomElementWriter
7568     }
7569 
7570     //if ( id==_stopTagId ) {
7571         //CRLog::trace("stop tag found, stopping...");
7572     //    _parser->Stop();
7573     //}
7574     _currNode = new ldomElementWriter( _document, nsid, id, _currNode );
7575     _flags = _currNode->getFlags();
7576     //logfile << " !o!\n";
7577     //return _currNode->getElement();
7578     return _currNode->getElement();
7579 }
7580 
~ldomDocumentWriter()7581 ldomDocumentWriter::~ldomDocumentWriter()
7582 {
7583     while (_currNode)
7584         _currNode = pop( _currNode, _currNode->getElement()->getNodeId() );
7585 #if BUILD_LITE!=1
7586     if ( _document->isDefStyleSet() ) {
7587         if ( _popStyleOnFinish )
7588             // pop any added styles to the original stylesheet so we get
7589             // the original one back and avoid a stylesheet hash mismatch
7590             _document->getStyleSheet()->pop();
7591         // Not sure why we would do that at end of parsing, but ok: it's
7592         // not recursive, so not expensive:
7593         _document->getRootNode()->initNodeStyle();
7594         _document->getRootNode()->initNodeFont();
7595         //if ( !_document->validateDocument() )
7596         //    CRLog::error("*** document style validation failed!!!");
7597         _document->updateRenderContext();
7598         _document->dumpStatistics();
7599         if ( _document->_nodeStylesInvalidIfLoading ) {
7600             // Some pseudoclass like :last-child has been met which has set this flag
7601             // (or, with the HTML parser, foster parenting of invalid element in tables)
7602             printf("CRE: document loaded, but styles re-init needed (cause: peculiar CSS pseudoclasses met)\n");
7603             _document->_nodeStylesInvalidIfLoading = false; // show this message only once
7604             _document->forceReinitStyles();
7605         }
7606         if ( _document->hasRenderData() ) {
7607             // We have created some RenderRectAccessors, to cache some CSS check results
7608             // (i.e. :nth-child(), :last-of-type...): we should clean them.
7609             // (We do that here for after the initial loading phase - on re-renderings,
7610             // this is done in updateRendMethod() called by initNodeRendMethodRecursive()
7611             // on all nodes.)
7612             _document->getRootNode()->clearRenderDataRecursive();
7613         }
7614     }
7615 
7616 #endif
7617 }
7618 
OnTagClose(const lChar32 *,const lChar32 * tagname,bool self_closing_tag)7619 void ldomDocumentWriter::OnTagClose( const lChar32 *, const lChar32 * tagname, bool self_closing_tag )
7620 {
7621     //logfile << "ldomDocumentWriter::OnTagClose() [" << nsname << ":" << tagname << "]";
7622     if (!_currNode || !_currNode->getElement())
7623     {
7624         _errFlag = true;
7625         //logfile << " !c-err!\n";
7626         return;
7627     }
7628 
7629     //lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
7630     lUInt16 curNodeId = _currNode->getElement()->getNodeId();
7631     lUInt16 id = _document->getElementNameIndex(tagname);
7632     _errFlag |= (id != curNodeId); // (we seem to not do anything with _errFlag)
7633     // We should expect the tagname we got to be the same as curNode's element name,
7634     // but it looks like we may get an upper closing tag, that pop() below might
7635     // handle. So, here below, we check that both id and curNodeId match the
7636     // element id we check for.
7637 
7638     // Parse <link rel="stylesheet">, put the css file link in _stylesheetLinks.
7639     // They will be added to <body><stylesheet> when we meet <BODY>
7640     // (duplicated in ldomDocumentWriterFilter::OnTagClose)
7641     if ( id == el_link && curNodeId == el_link ) { // link node
7642         ldomNode * n = _currNode->getElement();
7643         if ( n->getParentNode() && n->getParentNode()->getNodeId() == el_head &&
7644                  lString32(n->getAttributeValue("rel")).lowercase() == U"stylesheet" &&
7645                  lString32(n->getAttributeValue("type")).lowercase() == U"text/css" ) {
7646             lString32 href = n->getAttributeValue("href");
7647             lString32 stylesheetFile = LVCombinePaths( _document->getCodeBase(), href );
7648             CRLog::debug("Internal stylesheet file: %s", LCSTR(stylesheetFile));
7649             // We no more apply it immediately: it will be when <BODY> is met
7650             // _document->setDocStylesheetFileName(stylesheetFile);
7651             // _document->applyDocumentStyleSheet();
7652             _stylesheetLinks.add(stylesheetFile);
7653         }
7654     }
7655 
7656     _currNode = pop( _currNode, id );
7657         // _currNode is now the parent
7658 
7659     if ( _currNode )
7660         _flags = _currNode->getFlags();
7661 
7662     if ( id==_stopTagId ) {
7663         //CRLog::trace("stop tag found, stopping...");
7664         _parser->Stop();
7665     }
7666 
7667     // For EPUB/HTML, this is now dealt with in :OnTagBody(), just before creating this <stylesheet> tag.
7668     // But for FB2, where we have:
7669     //   <FictionBook>
7670     //     <stylesheet type="text/css">
7671     //       some css
7672     //     </stylesheet>
7673     //     <p>...
7674     //     other content
7675     //   </FictionBook>
7676     // we need to apply the <stylesheet> content we have just left, so it applies
7677     // to the coming up content.
7678     // We check the parent we have just pop'ed is a <FictionBook>.
7679     // Caveat: any style set on the <FictionBook> element itself won't be applied now
7680     // in this loading phase (as we have already set its style) - but it will apply
7681     // on re-renderings.
7682     if ( id == el_stylesheet && _currNode && _currNode->getElement()->getNodeId() == el_FictionBook ) {
7683         //CRLog::trace("</stylesheet> found");
7684 #if BUILD_LITE!=1
7685         if ( !_popStyleOnFinish && _document->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) {
7686             //CRLog::trace("saving current stylesheet before applying of document stylesheet");
7687             _document->getStyleSheet()->push();
7688             _popStyleOnFinish = true;
7689             _document->applyDocumentStyleSheet();
7690         }
7691 #endif
7692     }
7693 
7694     //logfile << " !c!\n";
7695 }
7696 
OnAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)7697 void ldomDocumentWriter::OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue )
7698 {
7699     //logfile << "ldomDocumentWriter::OnAttribute() [" << nsname << ":" << attrname << "]";
7700     lUInt16 attr_ns = (nsname && nsname[0]) ? _document->getNsNameIndex( nsname ) : 0;
7701     lUInt16 attr_id = (attrname && attrname[0]) ? _document->getAttrNameIndex( attrname ) : 0;
7702     _currNode->addAttribute( attr_ns, attr_id, attrvalue );
7703 
7704     //logfile << " !a!\n";
7705 }
7706 
OnText(const lChar32 * text,int len,lUInt32 flags)7707 void ldomDocumentWriter::OnText( const lChar32 * text, int len, lUInt32 flags )
7708 {
7709     //logfile << "ldomDocumentWriter::OnText() fpos=" << fpos;
7710 
7711     // Accumulate <HEAD><STYLE> content
7712     if (_inHeadStyle) {
7713         _headStyleText << lString32(text, len);
7714         _inHeadStyle = false;
7715         return;
7716     }
7717 
7718     if (_currNode)
7719     {
7720         if ( (_flags & XML_FLAG_NO_SPACE_TEXT)
7721              && IsEmptySpace(text, len)  && !(flags & TXTFLG_PRE))
7722              return;
7723         if (_currNode->_allowText)
7724             _currNode->onText( text, len, flags );
7725     }
7726     //logfile << " !t!\n";
7727 }
7728 
OnEncoding(const lChar32 *,const lChar32 *)7729 void ldomDocumentWriter::OnEncoding( const lChar32 *, const lChar32 *)
7730 {
7731 }
7732 
ldomDocumentWriter(ldomDocument * document,bool headerOnly)7733 ldomDocumentWriter::ldomDocumentWriter(ldomDocument * document, bool headerOnly)
7734     : _document(document), _currNode(NULL), _errFlag(false), _headerOnly(headerOnly), _popStyleOnFinish(false), _flags(0), _inHeadStyle(false)
7735 {
7736     _headStyleText.clear();
7737     _stylesheetLinks.clear();
7738     _stopTagId = 0xFFFE;
7739     IS_FIRST_BODY = true;
7740 
7741 #if BUILD_LITE!=1
7742     if ( _document->isDefStyleSet() ) {
7743         _document->getRootNode()->initNodeStyle();
7744         _document->getRootNode()->setRendMethod(erm_block);
7745     }
7746 #endif
7747 
7748     //CRLog::trace("ldomDocumentWriter() headerOnly=%s", _headerOnly?"true":"false");
7749 }
7750 
7751 
7752 
7753 
7754 
7755 
7756 
7757 
FindNextNode(ldomNode * & node,ldomNode * root)7758 bool FindNextNode( ldomNode * & node, ldomNode * root )
7759 {
7760     if ( node->getChildCount()>0 ) {
7761         // first child
7762         node = node->getChildNode(0);
7763         return true;
7764     }
7765     if (node->isRoot() || node == root )
7766         return false; // root node reached
7767     int index = node->getNodeIndex();
7768     ldomNode * parent = node->getParentNode();
7769     while (parent)
7770     {
7771         if ( index < (int)parent->getChildCount()-1 ) {
7772             // next sibling
7773             node = parent->getChildNode( index + 1 );
7774             return true;
7775         }
7776         if (parent->isRoot() || parent == root )
7777             return false; // root node reached
7778         // up one level
7779         index = parent->getNodeIndex();
7780         parent = parent->getParentNode();
7781     }
7782     //if ( node->getNodeType() == LXML_TEXT_NODE )
7783     return false;
7784 }
7785 
7786 // base64 decode table
7787 static const signed char base64_decode_table[] = {
7788    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //0..15
7789    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //16..31   10
7790    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, //32..47   20
7791    52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, //48..63   30
7792    -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, //64..79   40
7793    15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, //80..95   50
7794    -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, //INDEX2..111  60
7795    41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1  //112..127 70
7796 };
7797 
7798 #define BASE64_BUF_SIZE 128
7799 class LVBase64NodeStream : public LVNamedStream
7800 {
7801 private:
7802     ldomNode *  m_elem;
7803     ldomNode *  m_curr_node;
7804     lString32   m_curr_text;
7805     int         m_text_pos;
7806     lvsize_t    m_size;
7807     lvpos_t     m_pos;
7808 
7809     int         m_iteration;
7810     lUInt32     m_value;
7811 
7812     lUInt8      m_bytes[BASE64_BUF_SIZE];
7813     int         m_bytes_count;
7814     int         m_bytes_pos;
7815 
readNextBytes()7816     int readNextBytes()
7817     {
7818         int bytesRead = 0;
7819         bool flgEof = false;
7820         while ( bytesRead == 0 && !flgEof )
7821         {
7822             while ( m_text_pos >= (int)m_curr_text.length() )
7823             {
7824                 if ( !findNextTextNode() )
7825                     return bytesRead;
7826             }
7827             int len = m_curr_text.length();
7828             const lChar32 * txt = m_curr_text.c_str();
7829             for ( ; m_text_pos<len && m_bytes_count < BASE64_BUF_SIZE - 3; m_text_pos++ )
7830             {
7831                 lChar32 ch = txt[ m_text_pos ];
7832                 if ( ch < 128 )
7833                 {
7834                     if ( ch == '=' )
7835                     {
7836                         // end of stream
7837                         if ( m_iteration == 2 )
7838                         {
7839                             m_bytes[m_bytes_count++] = (lUInt8)((m_value>>4) & 0xFF);
7840                             bytesRead++;
7841                         }
7842                         else if ( m_iteration == 3 )
7843                         {
7844                             m_bytes[m_bytes_count++] = (lUInt8)((m_value>>10) & 0xFF);
7845                             m_bytes[m_bytes_count++] = (lUInt8)((m_value>>2) & 0xFF);
7846                             bytesRead += 2;
7847                         }
7848                         // stop!!!
7849                         //m_text_pos--;
7850                         m_iteration = 0;
7851                         flgEof = true;
7852                         break;
7853                     }
7854                     else
7855                     {
7856                         int k = base64_decode_table[ch];
7857                         if ( !(k & 0x80) ) {
7858                             // next base-64 digit
7859                             m_value = (m_value << 6) | (k);
7860                             m_iteration++;
7861                             if (m_iteration==4)
7862                             {
7863                                 //
7864                                 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>16) & 0xFF);
7865                                 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>8) & 0xFF);
7866                                 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>0) & 0xFF);
7867                                 m_iteration = 0;
7868                                 m_value = 0;
7869                                 bytesRead+=3;
7870                             }
7871                         } else {
7872                             //m_text_pos++;
7873                         }
7874                     }
7875                 }
7876             }
7877         }
7878         return bytesRead;
7879     }
7880 
findNextTextNode()7881     bool findNextTextNode()
7882     {
7883         while ( FindNextNode( m_curr_node, m_elem ) ) {
7884             if ( m_curr_node->isText() ) {
7885                 m_curr_text = m_curr_node->getText();
7886                 m_text_pos = 0;
7887                 return true;
7888             }
7889         }
7890         return false;
7891     }
7892 
bytesAvailable()7893     int bytesAvailable() { return m_bytes_count - m_bytes_pos; }
7894 
rewind()7895     bool rewind()
7896     {
7897         m_curr_node = m_elem;
7898         m_pos = 0;
7899         m_bytes_count = 0;
7900         m_bytes_pos = 0;
7901         m_iteration = 0;
7902         m_value = 0;
7903         return findNextTextNode();
7904     }
7905 
skip(lvsize_t count)7906     bool skip( lvsize_t count )
7907     {
7908         while ( count )
7909         {
7910             if ( m_bytes_pos >= m_bytes_count )
7911             {
7912                 m_bytes_pos = 0;
7913                 m_bytes_count = 0;
7914                 int bytesRead = readNextBytes();
7915                 if ( bytesRead == 0 )
7916                     return false;
7917             }
7918             int diff = (int) (m_bytes_count - m_bytes_pos);
7919             if (diff > (int)count)
7920                 diff = (int)count;
7921             m_pos += diff;
7922             count -= diff;
7923         }
7924         return true;
7925     }
7926 
7927 public:
~LVBase64NodeStream()7928     virtual ~LVBase64NodeStream() { }
LVBase64NodeStream(ldomNode * element)7929     LVBase64NodeStream( ldomNode * element )
7930         : m_elem(element), m_curr_node(element), m_text_pos(0), m_size(0), m_pos(0)
7931     {
7932         // calculate size
7933         rewind();
7934         m_size = bytesAvailable();
7935         for (;;) {
7936             int bytesRead = readNextBytes();
7937             if ( !bytesRead )
7938                 break;
7939             m_bytes_count = 0;
7940             m_bytes_pos = 0;
7941             m_size += bytesRead;
7942         }
7943         // rewind
7944         rewind();
7945     }
Eof()7946     virtual bool Eof()
7947     {
7948         return m_pos >= m_size;
7949     }
GetSize()7950     virtual lvsize_t  GetSize()
7951     {
7952         return m_size;
7953     }
7954 
GetPos()7955     virtual lvpos_t GetPos()
7956     {
7957         return m_pos;
7958     }
7959 
GetPos(lvpos_t * pos)7960     virtual lverror_t GetPos( lvpos_t * pos )
7961     {
7962         if (pos)
7963             *pos = m_pos;
7964         return LVERR_OK;
7965     }
7966 
Seek(lvoffset_t offset,lvseek_origin_t origin,lvpos_t * newPos)7967     virtual lverror_t Seek(lvoffset_t offset, lvseek_origin_t origin, lvpos_t* newPos)
7968     {
7969         lvpos_t npos = 0;
7970         lvpos_t currpos = GetPos();
7971         switch (origin) {
7972         case LVSEEK_SET:
7973             npos = offset;
7974             break;
7975         case LVSEEK_CUR:
7976             npos = currpos + offset;
7977             break;
7978         case LVSEEK_END:
7979             npos = m_size + offset;
7980             break;
7981         }
7982         if (npos > m_size)
7983             return LVERR_FAIL;
7984         if ( npos != currpos )
7985         {
7986             if (npos < currpos)
7987             {
7988                 if ( !rewind() || !skip(npos) )
7989                     return LVERR_FAIL;
7990             }
7991             else
7992             {
7993                 skip( npos - currpos );
7994             }
7995         }
7996         if (newPos)
7997             *newPos = npos;
7998         return LVERR_OK;
7999     }
Write(const void *,lvsize_t,lvsize_t *)8000     virtual lverror_t Write(const void*, lvsize_t, lvsize_t*)
8001     {
8002         return LVERR_NOTIMPL;
8003     }
Read(void * buf,lvsize_t size,lvsize_t * pBytesRead)8004     virtual lverror_t Read(void* buf, lvsize_t size, lvsize_t* pBytesRead)
8005     {
8006         lvsize_t bytesRead = 0;
8007         //fprintf( stderr, "Read()\n" );
8008 
8009         lUInt8 * out = (lUInt8 *)buf;
8010 
8011         while (size>0)
8012         {
8013             int sz = bytesAvailable();
8014             if (!sz) {
8015                 m_bytes_pos = m_bytes_count = 0;
8016                 sz = readNextBytes();
8017                 if (!sz) {
8018                     if ( !bytesRead || m_pos!=m_size) //
8019                         return LVERR_FAIL;
8020                     break;
8021                 }
8022             }
8023             if (sz>(int)size)
8024                 sz = (int)size;
8025             for (int i=0; i<sz; i++)
8026                 *out++ = m_bytes[m_bytes_pos++];
8027             size -= sz;
8028             bytesRead += sz;
8029             m_pos += sz;
8030         }
8031 
8032         if (pBytesRead)
8033             *pBytesRead = bytesRead;
8034         //fprintf( stderr, "    %d bytes read...\n", (int)bytesRead );
8035         return LVERR_OK;
8036     }
SetSize(lvsize_t)8037     virtual lverror_t SetSize(lvsize_t)
8038     {
8039         return LVERR_NOTIMPL;
8040     }
8041 };
8042 
img_scaling_option_t()8043 img_scaling_option_t::img_scaling_option_t()
8044 {
8045     mode = (MAX_IMAGE_SCALE_MUL>1) ? (ARBITRARY_IMAGE_SCALE_ENABLED==1 ? IMG_FREE_SCALING : IMG_INTEGER_SCALING) : IMG_NO_SCALE;
8046     max_scale = (MAX_IMAGE_SCALE_MUL>1) ? MAX_IMAGE_SCALE_MUL : 1;
8047 }
8048 
img_scaling_options_t()8049 img_scaling_options_t::img_scaling_options_t()
8050 {
8051     img_scaling_option_t option;
8052     zoom_in_inline = option;
8053     zoom_in_block = option;
8054     zoom_out_inline = option;
8055     zoom_out_block = option;
8056 }
8057 
8058 #define FONT_SIZE_BIG 32
8059 #define FONT_SIZE_VERY_BIG 50
updateScalingOption(img_scaling_option_t & v,CRPropRef props,int fontSize,bool zoomin,bool isInline)8060 static bool updateScalingOption( img_scaling_option_t & v, CRPropRef props, int fontSize, bool zoomin, bool isInline )
8061 {
8062     lString8 propName("crengine.image.scaling.");
8063     propName << (zoomin ? "zoomin." : "zoomout.");
8064     propName << (isInline ? "inline." : "block.");
8065     lString8 propNameMode = propName + "mode";
8066     lString8 propNameScale = propName + "scale";
8067     img_scaling_option_t def;
8068     int currMode = props->getIntDef(propNameMode.c_str(), (int)def.mode);
8069     int currScale = props->getIntDef(propNameScale.c_str(), (int)def.max_scale);
8070     if ( currScale==0 ) {
8071         if ( fontSize>=FONT_SIZE_VERY_BIG )
8072             currScale = 3;
8073         else if ( fontSize>=FONT_SIZE_BIG )
8074             currScale = 2;
8075         else
8076             currScale = 1;
8077     }
8078     if ( currScale==1 )
8079         currMode = 0;
8080     bool updated = false;
8081     if ( v.max_scale!=currScale ) {
8082         updated = true;
8083         v.max_scale = currScale;
8084     }
8085     if ( v.mode!=(img_scaling_mode_t)currMode ) {
8086         updated = true;
8087         v.mode = (img_scaling_mode_t)currMode;
8088     }
8089     props->setIntDef(propNameMode.c_str(), currMode);
8090     props->setIntDef(propNameScale.c_str(), currScale);
8091     return updated;
8092 }
8093 
8094 /// returns true if any changes occured
update(CRPropRef props,int fontSize)8095 bool img_scaling_options_t::update( CRPropRef props, int fontSize )
8096 {
8097     bool updated = false;
8098     updated = updateScalingOption( zoom_in_inline, props, fontSize, true, true ) || updated;
8099     updated = updateScalingOption( zoom_in_block, props, fontSize, true, false ) || updated;
8100     updated = updateScalingOption( zoom_out_inline, props, fontSize, false, true ) || updated;
8101     updated = updateScalingOption( zoom_out_block, props, fontSize, false, false ) || updated;
8102     return updated;
8103 }
8104 
ParseXPathStep(const lChar32 * & path,lString32 & name,int & index)8105 xpath_step_t ParseXPathStep( const lChar32 * &path, lString32 & name, int & index )
8106 {
8107     int pos = 0;
8108     const lChar32 * s = path;
8109     //int len = path.GetLength();
8110     name.clear();
8111     index = -1;
8112     int flgPrefix = 0;
8113     if (s && s[pos]) {
8114         lChar32 ch = s[pos];
8115         // prefix: none, '/' or '.'
8116         if (ch=='/') {
8117             flgPrefix = 1;
8118             ch = s[++pos];
8119         } else if (ch=='.') {
8120             flgPrefix = 2;
8121             ch = s[++pos];
8122         }
8123         int nstart = pos;
8124         if (ch>='0' && ch<='9') {
8125             // node or point index
8126             pos++;
8127             while (s[pos]>='0' && s[pos]<='9')
8128                 pos++;
8129             if (s[pos] && s[pos]!='/' && s[pos]!='.')
8130                 return xpath_step_error;
8131             lString32 sindex( path+nstart, pos-nstart );
8132             index = sindex.atoi();
8133             if (index<((flgPrefix==2)?0:1))
8134                 return xpath_step_error;
8135             path += pos;
8136             return (flgPrefix==2) ? xpath_step_point : xpath_step_nodeindex;
8137         }
8138         while (s[pos] && s[pos]!='[' && s[pos]!='/' && s[pos]!='.')
8139             pos++;
8140         if (pos==nstart)
8141             return xpath_step_error;
8142         name = lString32( path+ nstart, pos-nstart );
8143         if (s[pos]=='[') {
8144             // index
8145             pos++;
8146             int istart = pos;
8147             while (s[pos] && s[pos]!=']' && s[pos]!='/' && s[pos]!='.')
8148                 pos++;
8149             if (!s[pos] || pos==istart)
8150                 return xpath_step_error;
8151 
8152             lString32 sindex( path+istart, pos-istart );
8153             index = sindex.atoi();
8154             pos++;
8155         }
8156         if (!s[pos] || s[pos]=='/' || s[pos]=='.') {
8157             path += pos;
8158             return (name == "text()") ? xpath_step_text : xpath_step_element; // OK!
8159         }
8160         return xpath_step_error; // error
8161     }
8162     return xpath_step_error;
8163 }
8164 
8165 
8166 /// get pointer for relative path
relative(lString32 relativePath)8167 ldomXPointer ldomXPointer::relative( lString32 relativePath )
8168 {
8169     return getDocument()->createXPointer( getNode(), relativePath );
8170 }
8171 /// create xpointer from pointer string
createXPointer(const lString32 & xPointerStr)8172 ldomXPointer ldomDocument::createXPointer( const lString32 & xPointerStr )
8173 {
8174     if ( xPointerStr[0]=='#' ) {
8175         lString32 id = xPointerStr.substr(1);
8176         lUInt32 idid = getAttrValueIndex(id.c_str());
8177         lInt32 nodeIndex;
8178         if ( _idNodeMap.get(idid, nodeIndex) ) {
8179             ldomNode * node = getTinyNode(nodeIndex);
8180             if ( node && node->isElement() ) {
8181                 return ldomXPointer(node, -1);
8182             }
8183         }
8184         return ldomXPointer();
8185     }
8186     return createXPointer( getRootNode(), xPointerStr );
8187 }
8188 
8189 #if BUILD_LITE!=1
8190 
8191 /// return parent final node, if found
getFinalNode() const8192 ldomNode * ldomXPointer::getFinalNode() const
8193 {
8194     ldomNode * node = getNode();
8195     for (;;) {
8196         if ( !node )
8197             return NULL;
8198         if ( node->getRendMethod()==erm_final )
8199             return node;
8200         node = node->getParentNode();
8201     }
8202 }
8203 
8204 /// return true is this node is a final node
isFinalNode() const8205 bool ldomXPointer::isFinalNode() const
8206 {
8207     ldomNode * node = getNode();
8208     if ( !node )
8209         return false;
8210     if ( node->getRendMethod()==erm_final )
8211         return true;
8212     return false;
8213 }
8214 
8215 /// create xpointer from doc point
createXPointer(lvPoint pt,int direction,bool strictBounds,ldomNode * fromNode)8216 ldomXPointer ldomDocument::createXPointer( lvPoint pt, int direction, bool strictBounds, ldomNode * fromNode )
8217 {
8218     //
8219     lvPoint orig_pt = lvPoint(pt);
8220     ldomXPointer ptr;
8221     if ( !getRootNode() )
8222         return ptr;
8223     ldomNode * startNode;
8224     if ( fromNode ) {
8225         // Start looking from the fromNode provided - only used when we are
8226         // looking inside a floatBox or an inlineBox below and we have this
8227         // recursive call to createXPointer().
8228         // Even with a provided fromNode, pt must be provided in full absolute
8229         // coordinates. But we need to give to startNode->elementFromPoint()
8230         // a pt with coordinates relative to fromNode.
8231         // And because elementFromPoint() uses the fmt x/y offsets of the
8232         // start node (relative to the containing final block), we would
8233         // need to have pt relative to that containing final block - and so,
8234         // we'd need to lookup the final node from here (or have it provided
8235         // as an additional parameter if it's known by caller).
8236         // But because we're called only for floatBox and inlineBox, which
8237         // have only a single child, we can use the trick of calling
8238         // ->elementFromPoint() on that first child, while still getting
8239         // pt relative to fromNode itself:
8240         startNode = fromNode->getChildNode(0);
8241         lvRect rc;
8242         fromNode->getAbsRect( rc, true );
8243         pt.x -= rc.left;
8244         pt.y -= rc.top;
8245     }
8246     else {
8247         startNode = getRootNode();
8248     }
8249     ldomNode * finalNode = startNode->elementFromPoint( pt, direction );
8250     if ( fromNode )
8251         pt = orig_pt; // restore orig pt
8252     if ( !finalNode ) {
8253         // printf("no finalNode found from %s\n", UnicodeToLocal(ldomXPointer(fromNode, 0).toString()).c_str());
8254         // No node found, return start or end of document if pt overflows it, otherwise NULL
8255         if ( pt.y >= getFullHeight()) {
8256             ldomNode * node = getRootNode()->getLastTextChild();
8257             return ldomXPointer(node,node ? node->getText().length() : 0);
8258         } else if ( pt.y <= 0 ) {
8259             ldomNode * node = getRootNode()->getFirstTextChild();
8260             return ldomXPointer(node, 0);
8261         }
8262         CRLog::trace("not final node");
8263         return ptr;
8264     }
8265     // printf("finalNode %s\n", UnicodeToLocal(ldomXPointer(finalNode, 0).toString()).c_str());
8266 
8267     lvdom_element_render_method rm = finalNode->getRendMethod();
8268     if ( rm != erm_final ) {
8269         // Not final, return XPointer to first or last child
8270         lvRect rc;
8271         finalNode->getAbsRect( rc );
8272         if ( pt.y < (rc.bottom + rc.top) / 2 )
8273             return ldomXPointer( finalNode, 0 );
8274         else
8275             return ldomXPointer( finalNode, finalNode->getChildCount() );
8276     }
8277 
8278     // Final node found
8279     // Adjust pt in coordinates of the FormattedText
8280     RenderRectAccessor fmt( finalNode );
8281     lvRect rc;
8282     // When in enhanced rendering mode, we can get the FormattedText coordinates
8283     // and its width (inner_width) directly
8284     finalNode->getAbsRect( rc, true ); // inner=true
8285     pt.x -= rc.left;
8286     pt.y -= rc.top;
8287     int inner_width;
8288     if ( RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
8289         inner_width = fmt.getInnerWidth();
8290     }
8291     else {
8292         // In legacy mode, we just got the erm_final coordinates, and we must
8293         // compute and remove left/top border and padding (using rc.width() as
8294         // the base for % is wrong here, and so is rc.height() for padding top)
8295         int em = finalNode->getFont()->getSize();
8296         int padding_left = measureBorder(finalNode,3)+lengthToPx(finalNode->getStyle()->padding[0],rc.width(),em);
8297         int padding_right = measureBorder(finalNode,1)+lengthToPx(finalNode->getStyle()->padding[1],rc.width(),em);
8298         int padding_top = measureBorder(finalNode,0)+lengthToPx(finalNode->getStyle()->padding[2],rc.height(),em);
8299         pt.x -= padding_left;
8300         pt.y -= padding_top;
8301         // As well as the inner width
8302         inner_width  = fmt.getWidth() - padding_left - padding_right;
8303     }
8304 
8305     // Get the formatted text, so we can look for 'pt' line by line, word by word,
8306     // (and embedded float by embedded float if there are some).
8307     LFormattedTextRef txtform;
8308     {
8309         // This will possibly return it from CVRendBlockCache
8310         finalNode->renderFinalBlock( txtform, &fmt, inner_width );
8311     }
8312 
8313     // First, look if pt happens to be in some float
8314     // (this may not work with floats with negative margins)
8315     int fcount = txtform->GetFloatCount();
8316     for (int f=0; f<fcount; f++) {
8317         const embedded_float_t * flt = txtform->GetFloatInfo(f);
8318         // Ignore fake floats (no srctext) made from outer floats footprint
8319         if ( flt->srctext == NULL )
8320             continue;
8321         if (pt.x >= flt->x && pt.x < flt->x + flt->width && pt.y >= flt->y && pt.y < flt->y + flt->height ) {
8322             // pt is inside this float.
8323             ldomNode * node = (ldomNode *) flt->srctext->object; // floatBox node
8324             ldomXPointer inside_ptr = createXPointer( orig_pt, direction, strictBounds, node );
8325             if ( !inside_ptr.isNull() ) {
8326                 return inside_ptr;
8327             }
8328             // Otherwise, return xpointer to the floatNode itself
8329             return ldomXPointer(node, 0);
8330             // (Or should we let just go on looking only at the text in the original final node?)
8331         }
8332         // If no containing float, go on looking at the text of the original final node
8333     }
8334 
8335     // Look at words in the rendered final node (whether it's the original
8336     // main final node, or the one found in a float)
8337     int lcount = txtform->GetLineCount();
8338     for ( int l = 0; l<lcount; l++ ) {
8339         const formatted_line_t * frmline = txtform->GetLineInfo(l);
8340         if ( pt.y >= (int)(frmline->y + frmline->height) && l<lcount-1 )
8341             continue;
8342         // CRLog::debug("  point (%d, %d) line found [%d]: (%d..%d)",
8343         //      pt.x, pt.y, l, frmline->y, frmline->y+frmline->height);
8344         bool line_is_bidi = frmline->flags & LTEXT_LINE_IS_BIDI;
8345         int wc = (int)frmline->word_count;
8346 
8347         if ( direction >= PT_DIR_SCAN_FORWARD_LOGICAL_FIRST || direction <= PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST ) {
8348             // Only used by LVDocView::getBookmark(), LVDocView::getPageDocumentRange()
8349             // and ldomDocument::findText(), to not miss any content or text from
8350             // the page.
8351             // The SCAN_ part has been done done: a line has been found, and we want
8352             // to find node/chars from it in the logical (HTML) order, and not in the
8353             // visual order (that PT_DIR_SCAN_FORWARD/PT_DIR_SCAN_BACKWARD do), which
8354             // might not be the same in bidi lines:
8355             bool find_first = direction == PT_DIR_SCAN_FORWARD_LOGICAL_FIRST ||
8356                               direction == PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST;
8357                          // so, false when PT_DIR_SCAN_FORWARD_LOGICAL_LAST
8358                          //             or PT_DIR_SCAN_BACKWARD_LOGICAL_LAST
8359 
8360             const formatted_word_t * word = NULL;
8361             for ( int w=0; w<wc; w++ ) {
8362                 const formatted_word_t * tmpword = &frmline->words[w];
8363                 const src_text_fragment_t * src = txtform->GetSrcInfo(tmpword->src_text_index);
8364                 ldomNode * node = (ldomNode *)src->object;
8365                 if ( !node ) // ignore crengine added text (spacing, list item bullets...)
8366                     continue;
8367                 if ( !line_is_bidi ) {
8368                     word = tmpword;
8369                     if ( find_first )
8370                         break; // found logical first real word
8371                     // otherwise, go to the end, word will be logical last real word
8372                 }
8373                 else {
8374                     if (!word) { // first word seen: first candidate
8375                         word = tmpword;
8376                     }
8377                     else { // compare current word to the current candidate
8378                         if ( find_first && tmpword->src_text_index < word->src_text_index ) {
8379                             word = tmpword;
8380                         }
8381                         else if ( !find_first && tmpword->src_text_index > word->src_text_index ) {
8382                             word = tmpword;
8383                         }
8384                         else if (tmpword->src_text_index == word->src_text_index ) {
8385                             // (Same src_text_fragment_t, same src->t.offset, skip in when comparing)
8386                             if ( find_first && tmpword->t.start < word->t.start ) {
8387                                 word = tmpword;
8388                             }
8389                             else if ( !find_first && tmpword->t.start > word->t.start ) {
8390                                 word = tmpword;
8391                             }
8392                         }
8393                     }
8394                 }
8395             }
8396             if ( !word ) // no word: no xpointer (should not happen?)
8397                 return ptr;
8398             // Found right word/image
8399             const src_text_fragment_t * src = txtform->GetSrcInfo(word->src_text_index);
8400             ldomNode * node = (ldomNode *)src->object;
8401             if ( word->flags & LTEXT_WORD_IS_INLINE_BOX ) {
8402                 // pt is inside this inline-block inlineBox node
8403                 ldomXPointer inside_ptr = createXPointer( orig_pt, direction, strictBounds, node );
8404                 if ( !inside_ptr.isNull() ) {
8405                     return inside_ptr;
8406                 }
8407                 // Otherwise, return xpointer to the inlineBox itself
8408                 return ldomXPointer(node, 0);
8409             }
8410             if ( word->flags & LTEXT_WORD_IS_OBJECT ) {
8411                 return ldomXPointer(node, 0);
8412             }
8413             // It is a word
8414             if ( find_first ) // return xpointer to logical start of word
8415                 return ldomXPointer( node, src->t.offset + word->t.start );
8416             else // return xpointer to logical end of word
8417                 return ldomXPointer( node, src->t.offset + word->t.start + word->t.len );
8418         }
8419 
8420         // Found line, searching for word (words are in visual order)
8421         int x = pt.x - frmline->x;
8422         // frmline->x is text indentation (+ possibly leading space if text
8423         // centered or right aligned)
8424         if (strictBounds) {
8425             if (x < 0 || x > frmline->width) { // pt is before or after formatted text: nothing there
8426                 return ptr;
8427             }
8428         }
8429 
8430         for ( int w=0; w<wc; w++ ) {
8431             const formatted_word_t * word = &frmline->words[w];
8432             if ( ( !line_is_bidi && x < word->x + word->width ) ||
8433                  ( line_is_bidi && x >= word->x && x < word->x + word->width ) ||
8434                  ( w == wc-1 ) ) {
8435                 const src_text_fragment_t * src = txtform->GetSrcInfo(word->src_text_index);
8436                 // CRLog::debug(" word found [%d]: x=%d..%d, start=%d, len=%d  %08X",
8437                 //      w, word->x, word->x + word->width, word->t.start, word->t.len, src->object);
8438 
8439                 ldomNode * node = (ldomNode *)src->object;
8440                 if ( !node ) // Ignore crengine added text (spacing, list item bullets...)
8441                     continue;
8442 
8443                 if ( word->flags & LTEXT_WORD_IS_INLINE_BOX ) {
8444                     // pt is inside this inline-block inlineBox node
8445                     ldomXPointer inside_ptr = createXPointer( orig_pt, direction, strictBounds, node );
8446                     if ( !inside_ptr.isNull() ) {
8447                         return inside_ptr;
8448                     }
8449                     // Otherwise, return xpointer to the inlineBox itself
8450                     return ldomXPointer(node, 0);
8451                 }
8452                 if ( word->flags & LTEXT_WORD_IS_OBJECT ) {
8453                     // Object (image)
8454                     #if 1
8455                     // return image object itself
8456                     return ldomXPointer(node, 0);
8457                     #else
8458                     return ldomXPointer( node->getParentNode(),
8459                         node->getNodeIndex() + (( x < word->x + word->width/2 ) ? 0 : 1) );
8460                     #endif
8461                 }
8462 
8463                 // Found word, searching for letters
8464                 LVFont * font = (LVFont *) src->t.font;
8465                 lUInt16 width[512];
8466                 lUInt8 flg[512];
8467 
8468                 lString32 str = node->getText();
8469                 // We need to transform the node text as it had been when
8470                 // rendered (the transform may change chars widths) for the
8471                 // XPointer offset to be correct
8472                 switch ( node->getParentNode()->getStyle()->text_transform ) {
8473                     case css_tt_uppercase:
8474                         str.uppercase();
8475                         break;
8476                     case css_tt_lowercase:
8477                         str.lowercase();
8478                         break;
8479                     case css_tt_capitalize:
8480                         str.capitalize();
8481                         break;
8482                     case css_tt_full_width:
8483                         // str.fullWidthChars(); // disabled for now in lvrend.cpp
8484                         break;
8485                     default:
8486                         break;
8487                 }
8488 
8489                 lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags);
8490                 font->measureText( str.c_str()+word->t.start, word->t.len, width, flg, word->width+50, '?',
8491                             src->lang_cfg, src->letter_spacing + word->added_letter_spacing, false, hints);
8492 
8493                 bool word_is_rtl = word->flags & LTEXT_WORD_DIRECTION_IS_RTL;
8494                 if ( word_is_rtl ) {
8495                     for ( int i=word->t.len-1; i>=0; i-- ) {
8496                         int xx = ( i>0 ) ? (width[i-1] + width[i])/2 : width[i]/2;
8497                         xx = word->width - xx;
8498                         if ( x < word->x + xx ) {
8499                             return ldomXPointer( node, src->t.offset + word->t.start + i );
8500                         }
8501                     }
8502                     return ldomXPointer( node, src->t.offset + word->t.start );
8503                 }
8504                 else {
8505                     for ( int i=0; i<word->t.len; i++ ) {
8506                         int xx = ( i>0 ) ? (width[i-1] + width[i])/2 : width[i]/2;
8507                         if ( x < word->x + xx ) {
8508                             return ldomXPointer( node, src->t.offset + word->t.start + i );
8509                         }
8510                     }
8511                     return ldomXPointer( node, src->t.offset + word->t.start + word->t.len );
8512                 }
8513             }
8514         }
8515     }
8516     return ptr;
8517 }
8518 
8519 /// returns coordinates of pointer inside formatted document
toPoint(bool extended) const8520 lvPoint ldomXPointer::toPoint(bool extended) const
8521 {
8522     lvRect rc;
8523     if ( !getRect( rc, extended ) )
8524         return lvPoint(-1, -1);
8525     return rc.topLeft();
8526 }
8527 
8528 /// returns caret rectangle for pointer inside formatted document
8529 // (with extended=true, consider paddings and borders)
8530 // Note that extended / ldomXPointer::getRectEx() is only used (by cre.cpp)
8531 // when dealing with hyphenated words, getting each char width, char by char.
8532 // So we return the char width (and no more the word width) of the char
8533 // pointed to by this XPointer (unlike ldomXRange::getRectEx() which deals
8534 // with a range between 2 XPointers).
getRect(lvRect & rect,bool extended,bool adjusted) const8535 bool ldomXPointer::getRect(lvRect & rect, bool extended, bool adjusted) const
8536 {
8537     //CRLog::trace("ldomXPointer::getRect()");
8538     if ( isNull() )
8539         return false;
8540     ldomNode * p = isElement() ? getNode() : getNode()->getParentNode();
8541     ldomNode * p0 = p;
8542     ldomNode * finalNode = NULL;
8543     if ( !p ) {
8544         //CRLog::trace("ldomXPointer::getRect() - p==NULL");
8545         return false;
8546     }
8547     ldomDocument* doc = p->getDocument();
8548     //printf("getRect( p=%08X type=%d )\n", (unsigned)p, (int)p->getNodeType() );
8549     if ( !doc ) {
8550         //CRLog::trace("ldomXPointer::getRect() - p->getDocument()==NULL");
8551         return false;
8552     }
8553     ldomNode * mainNode = doc->getRootNode();
8554     for ( ; p; p = p->getParentNode() ) {
8555         int rm = p->getRendMethod();
8556         if ( rm == erm_final ) {
8557             if ( doc->getDOMVersionRequested() < 20180524 && p->getStyle()->display == css_d_list_item_legacy ) {
8558                 // This legacy rendering of list item is now erm_final, but
8559                 // can contain other real erm_final nodes.
8560                 // So, if we found an erm_final, and if we find this erm_final
8561                 // when going up, we should use it (unlike in next case).
8562                 // (This is needed to correctly display highlights on books opened
8563                 // with some older DOM_VERSION.)
8564                 finalNode = p;
8565             }
8566             else {
8567                 // With floats, we may get multiple erm_final when walking up
8568                 // to root node: keep the first one met (but go on up to the
8569                 // root node in case we're in some upper erm_invisible).
8570                 if (!finalNode)
8571                     finalNode = p; // found final block
8572             }
8573         }
8574         else if ( p->getRendMethod() == erm_invisible ) {
8575             return false; // invisible !!!
8576         }
8577         if ( p==mainNode )
8578             break;
8579     }
8580 
8581     if ( finalNode==NULL ) {
8582         lvRect rc;
8583         p0->getAbsRect( rc );
8584         CRLog::debug("node w/o final parent: %d..%d", rc.top, rc.bottom);
8585     }
8586 
8587     if ( finalNode!=NULL ) {
8588         lvRect rc;
8589         finalNode->getAbsRect( rc, extended ); // inner=true if extended=true
8590         if (rc.height() == 0 && rc.width() > 0) {
8591             rect = rc;
8592             rect.bottom++;
8593             return true;
8594         }
8595         RenderRectAccessor fmt( finalNode );
8596         //if ( !fmt )
8597         //    return false;
8598 
8599         // When in enhanced rendering mode, we can get the FormattedText coordinates
8600         // and its width (inner_width) directly
8601         int inner_width;
8602         if ( RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
8603             inner_width = fmt.getInnerWidth();
8604             // if extended=true, we got directly the adjusted rc.top and rc.left
8605         }
8606         else {
8607             // In legacy mode, we just got the erm_final coordinates, and we must
8608             // compute and remove left/top border and padding (using rc.width() as
8609             // the base for % is wrong here)
8610             int em = finalNode->getFont()->getSize();
8611             int padding_left = measureBorder(finalNode,3) + lengthToPx(finalNode->getStyle()->padding[0], rc.width(), em);
8612             int padding_right = measureBorder(finalNode,1) + lengthToPx(finalNode->getStyle()->padding[1], rc.width(), em);
8613             inner_width  = fmt.getWidth() - padding_left - padding_right;
8614             if (extended) {
8615                 int padding_top = measureBorder(finalNode,0) + lengthToPx(finalNode->getStyle()->padding[2], rc.width(), em);
8616                 rc.top += padding_top;
8617                 rc.left += padding_left;
8618                 // rc.right += padding_left; // wrong, but not used
8619                 // rc.bottom += padding_top; // wrong, but not used
8620             }
8621         }
8622 
8623         // Get the formatted text, so we can look where in it is this XPointer
8624         LFormattedTextRef txtform;
8625         finalNode->renderFinalBlock( txtform, &fmt, inner_width );
8626 
8627         ldomNode *node = getNode();
8628         int offset = getOffset();
8629 ////        ldomXPointerEx xp(node, offset);
8630 ////        if ( !node->isText() ) {
8631 ////            //ldomXPointerEx xp(node, offset);
8632 ////            xp.nextVisibleText();
8633 ////            node = xp.getNode();
8634 ////            offset = xp.getOffset();
8635 ////        }
8636 //        if ( node->isElement() ) {
8637 //            if ( offset>=0 ) {
8638 //                //
8639 //                if ( offset>= (int)node->getChildCount() ) {
8640 //                    node = node->getLastTextChild();
8641 //                    if ( node )
8642 //                        offset = node->getText().length();
8643 //                    else
8644 //                        return false;
8645 //                } else {
8646 //                    for ( int ci=offset; ci<(int)node->getChildCount(); ci++ ) {
8647 //                        ldomNode * child = node->getChildNode( offset );
8648 //                        ldomNode * txt = txt = child->getFirstTextChild( true );
8649 //                        if ( txt ) {
8650 //                            node = txt;
8651 ////                            lString32 s = txt->getText();
8652 ////                            CRLog::debug("text: [%d] '%s'", s.length(), LCSTR(s));
8653 //                            break;
8654 //                        }
8655 //                    }
8656 //                    if ( !node->isText() )
8657 //                        return false;
8658 //                    offset = 0;
8659 //                }
8660 //            }
8661 //        }
8662 
8663         // text node
8664         int srcIndex = -1;
8665         int srcLen = -1;
8666         int lastIndex = -1;
8667         int lastLen = -1;
8668         int lastOffset = -1;
8669         ldomXPointerEx xp(node, offset);
8670         for ( int i=0; i<txtform->GetSrcCount(); i++ ) {
8671             const src_text_fragment_t * src = txtform->GetSrcInfo(i);
8672             if ( src->flags & LTEXT_SRC_IS_FLOAT ) // skip floats
8673                 continue;
8674             bool isObject = (src->flags&LTEXT_SRC_IS_OBJECT)!=0;
8675             if ( src->object == node ) {
8676                 srcIndex = i;
8677                 srcLen = isObject ? 0 : src->t.len;
8678                 break;
8679             }
8680             lastIndex = i;
8681             lastLen =  isObject ? 0 : src->t.len;
8682             lastOffset = isObject ? 0 : src->t.offset;
8683             ldomXPointerEx xp2((ldomNode*)src->object, lastOffset);
8684             if ( xp2.compare(xp)>0 ) {
8685                 srcIndex = i;
8686                 srcLen = lastLen;
8687                 offset = lastOffset;
8688                 break;
8689             }
8690         }
8691         if ( srcIndex == -1 ) {
8692             if ( lastIndex<0 )
8693                 return false;
8694             srcIndex = lastIndex;
8695             srcLen = lastLen;
8696             offset = lastOffset;
8697         }
8698 
8699         // Some state for non-linear bidi word search
8700         int nearestForwardSrcIndex = -1;
8701         int nearestForwardSrcOffset = -1;
8702         lvRect bestBidiRect = lvRect();
8703         bool hasBestBidiRect = false;
8704 
8705         for ( int l = 0; l<txtform->GetLineCount(); l++ ) {
8706             const formatted_line_t * frmline = txtform->GetLineInfo(l);
8707             bool line_is_bidi = frmline->flags & LTEXT_LINE_IS_BIDI;
8708             for ( int w=0; w<(int)frmline->word_count; w++ ) {
8709                 const formatted_word_t * word = &frmline->words[w];
8710                 bool word_is_rtl = word->flags & LTEXT_WORD_DIRECTION_IS_RTL;
8711                 bool lastWord = (l == txtform->GetLineCount() - 1
8712                                  && w == frmline->word_count - 1);
8713 
8714                 if ( line_is_bidi ) {
8715                     // When line is bidi, src text nodes may be shuffled, so we can't
8716                     // just be done when meeting a forward src in logical order.
8717                     // We'd better have a dedicated searching code to not mess with
8718                     // the visual=logical order generic code below.
8719                     // todo: see if additional tweaks according to
8720                     // frmline->flags&LTEXT_LINE_PARA_IS_RTL may help adjusting
8721                     // char rects depending on it vs word_is_rtl.
8722                     if ( word->src_text_index>=srcIndex || lastWord ) {
8723                         // Found word from same or forward src line
8724                         if (word->src_text_index > srcIndex &&
8725                                ( nearestForwardSrcIndex == -1 ||
8726                                  word->src_text_index < nearestForwardSrcIndex ||
8727                                  (word->src_text_index == nearestForwardSrcIndex &&
8728                                     word->t.start < nearestForwardSrcOffset ) ) ) {
8729                             // Found some word from a forward src that is nearest than previously found one:
8730                             // get its start as a possible best result.
8731                             bestBidiRect.top = rc.top + frmline->y;
8732                             bestBidiRect.bottom = bestBidiRect.top + frmline->height;
8733                             if ( word_is_rtl ) {
8734                                 bestBidiRect.right = word->x + word->width + rc.left + frmline->x;
8735                                 bestBidiRect.left = bestBidiRect.right - 1;
8736                             }
8737                             else {
8738                                 bestBidiRect.left = word->x + rc.left + frmline->x;
8739                                 if (extended) {
8740                                     if (word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX) && word->width > 0)
8741                                         bestBidiRect.right = bestBidiRect.left + word->width; // width of image
8742                                     else
8743                                         bestBidiRect.right = bestBidiRect.left + 1;
8744                                 }
8745                             }
8746                             hasBestBidiRect = true;
8747                             nearestForwardSrcIndex = word->src_text_index;
8748                             if (word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX))
8749                                 nearestForwardSrcOffset = 0;
8750                             else
8751                                 nearestForwardSrcOffset = word->t.start;
8752                         }
8753                         else if (word->src_text_index == srcIndex) {
8754                             // Found word in that exact source text node
8755                             if ( word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX) ) {
8756                                 // An image is the single thing in its srcIndex
8757                                 rect.top = rc.top + frmline->y;
8758                                 rect.bottom = rect.top + frmline->height;
8759                                 rect.left = word->x + rc.left + frmline->x;
8760                                 if (word->width > 0)
8761                                     rect.right = rect.left + word->width; // width of image
8762                                 else
8763                                     rect.right = rect.left + 1;
8764                                 return true;
8765                             }
8766                             // Target is in this text node. We may not find it part
8767                             // of a word, so look at all words and keep the nearest
8768                             // (forward if possible) in case we don't find an exact one
8769                             if ( word->t.start > offset ) { // later word in logical order
8770                                 if (nearestForwardSrcIndex != word->src_text_index ||
8771                                           word->t.start <= nearestForwardSrcOffset ) {
8772                                     bestBidiRect.top = rc.top + frmline->y;
8773                                     bestBidiRect.bottom = bestBidiRect.top + frmline->height;
8774                                     if ( word_is_rtl ) { // right edge of next logical word, as it is drawn on the left
8775                                         bestBidiRect.right = word->x + word->width + rc.left + frmline->x;
8776                                         bestBidiRect.left = bestBidiRect.right - 1;
8777                                     }
8778                                     else { // left edge of next logical word, as it is drawn on the right
8779                                         bestBidiRect.left = word->x + rc.left + frmline->x;
8780                                         bestBidiRect.right = bestBidiRect.left + 1;
8781                                     }
8782                                     hasBestBidiRect = true;
8783                                     nearestForwardSrcIndex = word->src_text_index;
8784                                     nearestForwardSrcOffset = word->t.start;
8785                                 }
8786                             }
8787                             else if ( word->t.start+word->t.len <= offset ) { // past word in logical order
8788                                 // Only if/while we haven't yet found one with the right src index and
8789                                 // a forward offset
8790                                 if (nearestForwardSrcIndex != word->src_text_index ||
8791                                         ( nearestForwardSrcOffset < word->t.start &&
8792                                           word->t.start+word->t.len > nearestForwardSrcOffset ) ) {
8793                                     bestBidiRect.top = rc.top + frmline->y;
8794                                     bestBidiRect.bottom = bestBidiRect.top + frmline->height;
8795                                     if ( word_is_rtl ) { // left edge of previous logical word, as it is drawn on the right
8796                                         bestBidiRect.left = word->x + rc.left + frmline->x;
8797                                         bestBidiRect.right = bestBidiRect.left + 1;
8798                                     }
8799                                     else { // right edge of previous logical word, as it is drawn on the left
8800                                         bestBidiRect.right = word->x + word->width + rc.left + frmline->x;
8801                                         bestBidiRect.left = bestBidiRect.right - 1;
8802                                     }
8803                                     hasBestBidiRect = true;
8804                                     nearestForwardSrcIndex = word->src_text_index;
8805                                     nearestForwardSrcOffset = word->t.start+word->t.len;
8806                                 }
8807                             }
8808                             else { // exact word found
8809                                 // Measure word
8810                                 LVFont *font = (LVFont *) txtform->GetSrcInfo(srcIndex)->t.font;
8811                                 lUInt16 w[512];
8812                                 lUInt8 flg[512];
8813                                 lString32 str = node->getText();
8814                                 if (offset == word->t.start && str.empty()) {
8815                                     rect.left = word->x + rc.left + frmline->x;
8816                                     rect.top = rc.top + frmline->y;
8817                                     rect.right = rect.left + 1;
8818                                     rect.bottom = rect.top + frmline->height;
8819                                     return true;
8820                                 }
8821                                 // We need to transform the node text as it had been when
8822                                 // rendered (the transform may change chars widths) for the
8823                                 // rect to be correct
8824                                 switch ( node->getParentNode()->getStyle()->text_transform ) {
8825                                     case css_tt_uppercase:
8826                                         str.uppercase();
8827                                         break;
8828                                     case css_tt_lowercase:
8829                                         str.lowercase();
8830                                         break;
8831                                     case css_tt_capitalize:
8832                                         str.capitalize();
8833                                         break;
8834                                     case css_tt_full_width:
8835                                         // str.fullWidthChars(); // disabled for now in lvrend.cpp
8836                                         break;
8837                                     default:
8838                                         break;
8839                                 }
8840                                 lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags);
8841                                 font->measureText(
8842                                     str.c_str()+word->t.start,
8843                                     word->t.len,
8844                                     w,
8845                                     flg,
8846                                     word->width+50,
8847                                     '?',
8848                                     txtform->GetSrcInfo(srcIndex)->lang_cfg,
8849                                     txtform->GetSrcInfo(srcIndex)->letter_spacing + word->added_letter_spacing,
8850                                     false,
8851                                     hints);
8852                                 rect.top = rc.top + frmline->y;
8853                                 rect.bottom = rect.top + frmline->height;
8854                                 // chx is the width of previous chars in the word
8855                                 int chx = (offset > word->t.start) ? w[ offset - word->t.start - 1 ] : 0;
8856                                 if ( word_is_rtl ) {
8857                                     rect.right = word->x + word->width - chx + rc.left + frmline->x;
8858                                     rect.left = rect.right - 1;
8859                                 }
8860                                 else {
8861                                     rect.left = word->x + chx + rc.left + frmline->x;
8862                                     rect.right = rect.left + 1;
8863                                 }
8864                                 if (extended) { // get width of char at offset
8865                                     if (offset == word->t.start && word->t.len == 1) {
8866                                         // With CJK chars, the measured width seems
8867                                         // less correct than the one measured while
8868                                         // making words. So use the calculated word
8869                                         // width for one-char-long words instead
8870                                         if ( word_is_rtl )
8871                                             rect.left = rect.right - word->width;
8872                                         else
8873                                             rect.right = rect.left + word->width;
8874                                     }
8875                                     else {
8876                                         int chw = w[ offset - word->t.start ] - chx;
8877                                         bool hyphen_added = false;
8878                                         if ( offset == word->t.start + word->t.len - 1
8879                                                 && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) ) {
8880                                             // if offset is the end of word, and this word has
8881                                             // been hyphenated, includes the hyphen width
8882                                             chw += font->getHyphenWidth();
8883                                             // We then should not account for the right side
8884                                             // bearing below
8885                                             hyphen_added = true;
8886                                         }
8887                                         if ( word_is_rtl ) {
8888                                             rect.left = rect.right - chw;
8889                                             if ( !hyphen_added ) {
8890                                                 // Also remove our added letter spacing for justification
8891                                                 // from the left, to have cleaner highlights.
8892                                                 rect.left += word->added_letter_spacing;
8893                                             }
8894                                         }
8895                                         else {
8896                                             rect.right = rect.left + chw;
8897                                             if ( !hyphen_added ) {
8898                                                 // Also remove our added letter spacing for justification
8899                                                 // from the right, to have cleaner highlights.
8900                                                 rect.right -= word->added_letter_spacing;
8901                                             }
8902                                         }
8903                                         if (adjusted) {
8904                                             // Extend left or right if this glyph overflows its
8905                                             // origin/advance box (can happen with an italic font,
8906                                             // or with a regular font on the right of the letter 'f'
8907                                             // or on the left of the letter 'J').
8908                                             // Only when negative (overflow) and not when positive
8909                                             // (which are more frequent), mostly to keep some good
8910                                             // looking rectangles on the sides when highlighting
8911                                             // multiple lines.
8912                                             rect.left += font->getLeftSideBearing(str[offset], true);
8913                                             if ( !hyphen_added )
8914                                                 rect.right -= font->getRightSideBearing(str[offset], true);
8915                                             // Should work wheter rtl or ltr
8916                                         }
8917                                     }
8918                                     // Ensure we always return a non-zero width, even for zero-width
8919                                     // chars or collapsed spaces (to avoid isEmpty() returning true
8920                                     // which could be considered as a failure)
8921                                     if ( rect.right <= rect.left ) {
8922                                         if ( word_is_rtl )
8923                                             rect.left = rect.right - 1;
8924                                         else
8925                                             rect.right = rect.left + 1;
8926                                     }
8927                                 }
8928                                 return true;
8929                             }
8930                         }
8931                         if ( lastWord ) {
8932                             // If no exact word found, return best candidate
8933                             if (hasBestBidiRect) {
8934                                 rect = bestBidiRect;
8935                                 return true;
8936                             }
8937                             // Otherwise, return end of last word (?)
8938                             rect.top = rc.top + frmline->y;
8939                             rect.bottom = rect.top + frmline->height;
8940                             rect.left = word->x + rc.left + frmline->x + word->width;
8941                             rect.right = rect.left + 1;
8942                             return true;
8943                         }
8944                     }
8945                     continue;
8946                 } // end if line_is_bidi
8947 
8948                 // ================================
8949                 // Generic code when visual order = logical order
8950                 if ( word->src_text_index>=srcIndex || lastWord ) {
8951                     // found word from same src line
8952                     if ( word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX)
8953                             || word->src_text_index > srcIndex
8954                             || (!extended && offset <= word->t.start)
8955                             || (extended && offset < word->t.start)
8956                             // if extended, and offset = word->t.start, we want to
8957                             // measure the first char, which is done in the next else
8958                             ) {
8959                         // before this word
8960                         rect.left = word->x + rc.left + frmline->x;
8961                         //rect.top = word->y + rc.top + frmline->y + frmline->baseline;
8962                         rect.top = rc.top + frmline->y;
8963                         if (extended) {
8964                             if (word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX) && word->width > 0)
8965                                 rect.right = rect.left + word->width; // width of image
8966                             else
8967                                 rect.right = rect.left + 1; // not the right word: no char width
8968                         }
8969                         else {
8970                             rect.right = rect.left + 1;
8971                         }
8972                         rect.bottom = rect.top + frmline->height;
8973                         return true;
8974                     } else if ( (word->src_text_index == srcIndex) &&
8975                                 ( (offset < word->t.start+word->t.len) ||
8976                                   (offset==srcLen && offset == word->t.start+word->t.len) ) ) {
8977                         // pointer inside this word
8978                         LVFont *font = (LVFont *) txtform->GetSrcInfo(srcIndex)->t.font;
8979                         lUInt16 w[512];
8980                         lUInt8 flg[512];
8981                         lString32 str = node->getText();
8982                         // With "|| (extended && offset < word->t.start)" added to the first if
8983                         // above, we may now be here with: offset = word->t.start = 0
8984                         // and a node->getText() returning THE lString32::empty_str:
8985                         // font->measureText() would segfault on it because its just a dummy
8986                         // pointer. Not really sure why that happens.
8987                         // It happens when node is the <a> in:
8988                         //     <div><span> <a id="someId"/>Anciens </span> <b>...
8989                         // and offset=0, word->t.start=0, word->t.len=8 .
8990                         // We can just do as in the first 'if'.
8991                         if (offset == word->t.start && str.empty()) {
8992                             rect.left = word->x + rc.left + frmline->x;
8993                             rect.top = rc.top + frmline->y;
8994                             rect.right = rect.left + 1;
8995                             rect.bottom = rect.top + frmline->height;
8996                             return true;
8997                         }
8998                         // We need to transform the node text as it had been when
8999                         // rendered (the transform may change chars widths) for the
9000                         // rect to be correct
9001                         switch ( node->getParentNode()->getStyle()->text_transform ) {
9002                             case css_tt_uppercase:
9003                                 str.uppercase();
9004                                 break;
9005                             case css_tt_lowercase:
9006                                 str.lowercase();
9007                                 break;
9008                             case css_tt_capitalize:
9009                                 str.capitalize();
9010                                 break;
9011                             case css_tt_full_width:
9012                                 // str.fullWidthChars(); // disabled for now in lvrend.cpp
9013                                 break;
9014                             default:
9015                                 break;
9016                         }
9017                         lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags);
9018                         font->measureText(
9019                             str.c_str()+word->t.start,
9020                             word->t.len,
9021                             w,
9022                             flg,
9023                             word->width+50,
9024                             '?',
9025                             txtform->GetSrcInfo(srcIndex)->lang_cfg,
9026                             txtform->GetSrcInfo(srcIndex)->letter_spacing + word->added_letter_spacing,
9027                             false,
9028                             hints );
9029                         // chx is the width of previous chars in the word
9030                         int chx = (offset > word->t.start) ? w[ offset - word->t.start - 1 ] : 0;
9031                         rect.left = word->x + chx + rc.left + frmline->x;
9032                         //rect.top = word->y + rc.top + frmline->y + frmline->baseline;
9033                         rect.top = rc.top + frmline->y;
9034                         if (extended) { // get width of char at offset
9035                             if (offset == word->t.start && word->t.len == 1) {
9036                                 // With CJK chars, the measured width seems
9037                                 // less correct than the one measured while
9038                                 // making words. So use the calculated word
9039                                 // width for one-char-long words instead
9040                                 rect.right = rect.left + word->width;
9041                             }
9042                             else {
9043                                 int chw = w[ offset - word->t.start ] - chx;
9044                                 bool hyphen_added = false;
9045                                 if ( offset == word->t.start + word->t.len - 1
9046                                         && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) ) {
9047                                     // if offset is the end of word, and this word has
9048                                     // been hyphenated, includes the hyphen width
9049                                     chw += font->getHyphenWidth();
9050                                     // We then should not account for the right side
9051                                     // bearing below
9052                                     hyphen_added = true;
9053                                 }
9054                                 rect.right = rect.left + chw;
9055                                 if ( !hyphen_added ) {
9056                                     // Also remove our added letter spacing for justification
9057                                     // from the right, to have cleaner highlights.
9058                                     rect.right -= word->added_letter_spacing;
9059                                 }
9060                                 if (adjusted) {
9061                                     // Extend left or right if this glyph overflows its
9062                                     // origin/advance box (can happen with an italic font,
9063                                     // or with a regular font on the right of the letter 'f'
9064                                     // or on the left of the letter 'J').
9065                                     // Only when negative (overflow) and not when positive
9066                                     // (which are more frequent), mostly to keep some good
9067                                     // looking rectangles on the sides when highlighting
9068                                     // multiple lines.
9069                                     rect.left += font->getLeftSideBearing(str[offset], true);
9070                                     if ( !hyphen_added )
9071                                         rect.right -= font->getRightSideBearing(str[offset], true);
9072                                 }
9073                             }
9074                             // Ensure we always return a non-zero width, even for zero-width
9075                             // chars or collapsed spaces (to avoid isEmpty() returning true
9076                             // which could be considered as a failure)
9077                             if ( rect.right <= rect.left )
9078                                 rect.right = rect.left + 1;
9079                         }
9080                         else
9081                             rect.right = rect.left + 1;
9082                         rect.bottom = rect.top + frmline->height;
9083                         return true;
9084                     } else if (lastWord) {
9085                         // after last word
9086                         rect.left = word->x + rc.left + frmline->x + word->width;
9087                         //rect.top = word->y + rc.top + frmline->y + frmline->baseline;
9088                         rect.top = rc.top + frmline->y;
9089                         if (extended)
9090                             rect.right = rect.left + 1; // not the right word: no char width
9091                         else
9092                             rect.right = rect.left + 1;
9093                         rect.bottom = rect.top + frmline->height;
9094                         return true;
9095                     }
9096                 }
9097             }
9098         }
9099         // return false;
9100         // Not found, which is possible with a final node with only empty
9101         // elements. This final node has a rect, so use it.
9102         rect = rc;
9103         return true;
9104     } else {
9105         // no base final node, using blocks
9106         //lvRect rc;
9107         ldomNode * node = getNode();
9108         int offset = getOffset();
9109         if ( offset<0 || node->getChildCount()==0 ) {
9110             node->getAbsRect( rect );
9111             return true;
9112             //return rc.topLeft();
9113         }
9114         if ( offset < (int)node->getChildCount() ) {
9115             node->getChildNode(offset)->getAbsRect( rect );
9116             return true;
9117             //return rc.topLeft();
9118         }
9119         node->getChildNode(node->getChildCount()-1)->getAbsRect( rect );
9120         return true;
9121         //return rc.bottomRight();
9122     }
9123 }
9124 #endif
9125 
isBoxingNode(ldomNode * node)9126 static bool isBoxingNode(ldomNode * node)
9127 {
9128     // In the context this is used (xpointers), handle pseudoElems (that don't
9129     // box anything) just as boxing nodes: ignoring them in XPointers.
9130     return node->isBoxingNode(true);
9131 }
9132 
isTextNode(ldomNode * node)9133 static bool isTextNode(ldomNode * node)
9134 {
9135     return (node && node->isText());
9136 }
9137 
9138 struct ldomNodeIdPredicate
9139 {
9140     lUInt16 m_id;
ldomNodeIdPredicateldomNodeIdPredicate9141     ldomNodeIdPredicate(lUInt16 id) : m_id(id) {}
operator ()ldomNodeIdPredicate9142     bool operator() (ldomNode * node) {
9143         return (node && node->getNodeId() == m_id);
9144     }
9145 };
9146 
notNull(ldomNode * node)9147 static bool notNull(ldomNode * node)
9148 {
9149     return (NULL != node);
9150 }
9151 
9152 template<typename T>
getNodeByIndex(ldomNode * parent,int index,T predicat,int & count)9153 static ldomNode * getNodeByIndex(ldomNode *parent, int index, T predicat, int& count)
9154 {
9155     ldomNode *foundNode = NULL;
9156 
9157     for( int i=0; i < (int)parent->getChildCount(); i++) {
9158         ldomNode * p = parent->getChildNode(i);
9159         if( isBoxingNode(p) ) {
9160             foundNode = getNodeByIndex(p, index, predicat, count);
9161             if( foundNode )
9162                 return foundNode;
9163         } else if(predicat(p)) {
9164             count++;
9165             if(index == -1 || count == index) {
9166                 if( !foundNode )
9167                     foundNode = p;
9168                 return foundNode;
9169             }
9170         }
9171     }
9172     return NULL;
9173 }
9174 
9175 /// create XPointer from relative pointer non-normalized string made by toStringV1()
createXPointerV1(ldomNode * baseNode,const lString32 & xPointerStr)9176 ldomXPointer ldomDocument::createXPointerV1( ldomNode * baseNode, const lString32 & xPointerStr )
9177 {
9178     //CRLog::trace( "ldomDocument::createXPointer(%s)", UnicodeToUtf8(xPointerStr).c_str() );
9179     if ( xPointerStr.empty() || !baseNode )
9180         return ldomXPointer();
9181     const lChar32 * str = xPointerStr.c_str();
9182     int index = -1;
9183     ldomNode * currNode = baseNode;
9184     lString32 name;
9185     lString8 ptr8 = UnicodeToUtf8(xPointerStr);
9186     //const char * ptr = ptr8.c_str();
9187     xpath_step_t step_type;
9188 
9189     while ( *str ) {
9190         //CRLog::trace( "    %s", UnicodeToUtf8(lString32(str)).c_str() );
9191         step_type = ParseXPathStep( str, name, index );
9192         //CRLog::trace( "        name=%s index=%d", UnicodeToUtf8(lString32(name)).c_str(), index );
9193         switch (step_type ) {
9194         case xpath_step_error:
9195             // error
9196             //CRLog::trace("    xpath_step_error");
9197             return ldomXPointer();
9198         case xpath_step_element:
9199             // element of type 'name' with 'index'        /elemname[N]/
9200             {
9201                 lUInt16 id = getElementNameIndex( name.c_str() );
9202                 ldomNode * foundItem = currNode->findChildElement(LXML_NS_ANY, id, index > 0 ? index - 1 : -1);
9203                 if (foundItem == NULL && currNode->getChildCount() == 1) {
9204                     // make saved pointers work properly even after moving of some part of path one element deeper
9205                     foundItem = currNode->getChildNode(0)->findChildElement(LXML_NS_ANY, id, index > 0 ? index - 1 : -1);
9206                 }
9207 //                int foundCount = 0;
9208 //                for (unsigned i=0; i<currNode->getChildCount(); i++) {
9209 //                    ldomNode * p = currNode->getChildNode(i);
9210 //                    //CRLog::trace( "        node[%d] = %d %s", i, p->getNodeId(), LCSTR(p->getNodeName()) );
9211 //                    if ( p && p->isElement() && p->getNodeId()==id ) {
9212 //                        foundCount++;
9213 //                        if ( foundCount==index || index==-1 ) {
9214 //                            foundItem = p;
9215 //                            break; // DON'T CHECK WHETHER OTHER ELEMENTS EXIST
9216 //                        }
9217 //                    }
9218 //                }
9219 //                if ( foundItem==NULL || (index==-1 && foundCount>1) ) {
9220 //                    //CRLog::trace("    Element %d is not found. foundCount=%d", id, foundCount);
9221 //                    return ldomXPointer(); // node not found
9222 //                }
9223                 if (foundItem == NULL) {
9224                     //CRLog::trace("    Element %d is not found. foundCount=%d", id, foundCount);
9225                     return ldomXPointer(); // node not found
9226                 }
9227                 // found element node
9228                 currNode = foundItem;
9229             }
9230             break;
9231         case xpath_step_text:
9232             // text node with 'index'                     /text()[N]/
9233             {
9234                 ldomNode * foundItem = NULL;
9235                 int foundCount = 0;
9236                 for (int i=0; i<currNode->getChildCount(); i++) {
9237                     ldomNode * p = currNode->getChildNode(i);
9238                     if ( p->isText() ) {
9239                         foundCount++;
9240                         if ( foundCount==index || index==-1 ) {
9241                             foundItem = p;
9242                         }
9243                     }
9244                 }
9245                 if ( foundItem==NULL || (index==-1 && foundCount>1) )
9246                     return ldomXPointer(); // node not found
9247                 // found text node
9248                 currNode = foundItem;
9249             }
9250             break;
9251         case xpath_step_nodeindex:
9252             // node index                                 /N/
9253             if ( index<=0 || index>(int)currNode->getChildCount() )
9254                 return ldomXPointer(); // node not found: invalid index
9255             currNode = currNode->getChildNode( index-1 );
9256             break;
9257         case xpath_step_point:
9258             // point index                                .N
9259             if (*str)
9260                 return ldomXPointer(); // not at end of string
9261             if ( currNode->isElement() ) {
9262                 // element point
9263                 if ( index<0 || index>(int)currNode->getChildCount() )
9264                     return ldomXPointer();
9265                 return ldomXPointer(currNode, index);
9266             } else {
9267                 // text point
9268                 if ( index<0 || index>(int)currNode->getText().length() )
9269                     return ldomXPointer();
9270                 return ldomXPointer(currNode, index);
9271             }
9272             break;
9273         }
9274     }
9275     return ldomXPointer( currNode, -1 ); // XPath: index==-1
9276 }
9277 
9278 /// create XPointer from relative pointer normalized string made by toStringV2()
createXPointerV2(ldomNode * baseNode,const lString32 & xPointerStr)9279 ldomXPointer ldomDocument::createXPointerV2( ldomNode * baseNode, const lString32 & xPointerStr )
9280 {
9281     //CRLog::trace( "ldomDocument::createXPointer(%s)", UnicodeToUtf8(xPointerStr).c_str() );
9282     if ( xPointerStr.empty() || !baseNode )
9283         return ldomXPointer();
9284     const lChar32 * str = xPointerStr.c_str();
9285     int index = -1;
9286     int count;
9287     ldomNode * currNode = baseNode;
9288     ldomNode * foundNode;
9289     lString32 name;
9290     xpath_step_t step_type;
9291 
9292     while ( *str ) {
9293         //CRLog::trace( "    %s", UnicodeToUtf8(lString32(str)).c_str() );
9294         step_type = ParseXPathStep( str, name, index );
9295         //CRLog::trace( "        name=%s index=%d", UnicodeToUtf8(lString32(name)).c_str(), index );
9296         switch (step_type ) {
9297         case xpath_step_error:
9298             // error
9299             //CRLog::trace("    xpath_step_error");
9300             return ldomXPointer();
9301         case xpath_step_element:
9302             // element of type 'name' with 'index'        /elemname[N]/
9303             {
9304                 ldomNodeIdPredicate predicat(getElementNameIndex( name.c_str() ));
9305                 count = 0;
9306                 foundNode = getNodeByIndex(currNode, index, predicat, count);
9307                 if (foundNode == NULL) {
9308                     //CRLog::trace("    Element %d is not found. foundCount=%d", id, foundCount);
9309                     return ldomXPointer(); // node not found
9310                 }
9311                 // found element node
9312                 currNode = foundNode;
9313                 lString32 nm = currNode->getNodeName();
9314                 CRLog::trace("%d -> %s", index, LCSTR(nm));
9315             }
9316             break;
9317         case xpath_step_text:
9318             //
9319             count = 0;
9320             foundNode = getNodeByIndex(currNode, index, isTextNode, count);
9321 
9322             if ( foundNode==NULL )
9323                 return ldomXPointer(); // node not found
9324             // found text node
9325             currNode = foundNode;
9326             break;
9327         case xpath_step_nodeindex:
9328             // node index                                 /N/
9329             count = 0;
9330             foundNode = getNodeByIndex(currNode, index, notNull, count);
9331             if ( foundNode == NULL )
9332                 return ldomXPointer(); // node not found: invalid index
9333             currNode = foundNode;
9334             break;
9335         case xpath_step_point:
9336             // point index                                .N
9337             if (*str)
9338                 return ldomXPointer(); // not at end of string
9339             if ( currNode->isElement() ) {
9340                 // element point
9341                 if ( index<0 || index>(int)currNode->getChildCount() )
9342                     return ldomXPointer();
9343                 return ldomXPointer(currNode, index);
9344             } else {
9345                 // text point
9346                 if ( index<0 || index>(int)currNode->getText().length() )
9347                     return ldomXPointer();
9348                 return ldomXPointer(currNode, index);
9349             }
9350             break;
9351         }
9352     }
9353     return ldomXPointer( currNode, -1 ); // XPath: index==-1
9354 }
9355 
9356 /// returns XPath segment for this element relative to parent element (e.g. "p[10]")
getXPathSegment()9357 lString32 ldomNode::getXPathSegment()
9358 {
9359     if ( isNull() || isRoot() )
9360         return lString32::empty_str;
9361     ldomNode * parent = getParentNode();
9362     int cnt = parent->getChildCount();
9363     int index = 0;
9364     if ( isElement() ) {
9365         int id = getNodeId();
9366         for ( int i=0; i<cnt; i++ ) {
9367             ldomNode * node = parent->getChildNode(i);
9368             if ( node == this ) {
9369                 return getNodeName() + "[" + fmt::decimal(index+1) + "]";
9370             }
9371             if ( node->isElement() && node->getNodeId()==id )
9372                 index++;
9373         }
9374     } else {
9375         for ( int i=0; i<cnt; i++ ) {
9376             ldomNode * node = parent->getChildNode(i);
9377             if ( node == this ) {
9378                 return "text()[" + lString32::itoa(index+1) + "]";
9379             }
9380             if ( node->isText() )
9381                 index++;
9382         }
9383     }
9384     return lString32::empty_str;
9385 }
9386 
9387 // Using names, old, with boxing elements (non-normalized)
toStringV1()9388 lString32 ldomXPointer::toStringV1()
9389 {
9390     lString32 path;
9391     if ( isNull() )
9392         return path;
9393     ldomNode * node = getNode();
9394     int offset = getOffset();
9395     if ( offset >= 0 ) {
9396         path << "." << fmt::decimal(offset);
9397     }
9398     ldomNode * p = node;
9399     ldomNode * mainNode = node->getDocument()->getRootNode();
9400     while (p && p!=mainNode) {
9401         ldomNode * parent = p->getParentNode();
9402         if ( p->isElement() ) {
9403             // element
9404             lString32 name = p->getNodeName();
9405             lUInt16 id = p->getNodeId();
9406             if ( !parent )
9407                 return "/" + name + path;
9408             int index = -1;
9409             int count = 0;
9410             for ( int i=0; i<parent->getChildCount(); i++ ) {
9411                 ldomNode * node = parent->getChildElementNode( i, id );
9412                 if ( node ) {
9413                     count++;
9414                     if ( node==p )
9415                         index = count;
9416                 }
9417             }
9418             if ( count>1 )
9419                 path = cs32("/") + name + "[" + fmt::decimal(index) + "]" + path;
9420             else
9421                 path = cs32("/") + name + path;
9422         } else {
9423             // text
9424             if ( !parent )
9425                 return cs32("/text()") + path;
9426             int index = -1;
9427             int count = 0;
9428             for ( int i=0; i<parent->getChildCount(); i++ ) {
9429                 ldomNode * node = parent->getChildNode( i );
9430                 if ( node->isText() ) {
9431                     count++;
9432                     if ( node==p )
9433                         index = count;
9434                 }
9435             }
9436             if ( count>1 )
9437                 path = cs32("/text()") + "[" + fmt::decimal(index) + "]" + path;
9438             else
9439                 path = "/text()" + path;
9440         }
9441         p = parent;
9442     }
9443     return path;
9444 }
9445 
9446 template<typename T>
getElementIndex(ldomNode * parent,ldomNode * targetNode,T predicat,int & count)9447 static int getElementIndex(ldomNode* parent, ldomNode *targetNode, T predicat, int& count)
9448 {
9449     for ( int i=0; i<parent->getChildCount(); i++ ) {
9450         ldomNode * node = parent->getChildNode( i );
9451         if( isBoxingNode(node) && targetNode != node ) {
9452             int index = getElementIndex(node, targetNode, predicat, count);
9453             if(index > 0)
9454                 return index;
9455         } else if (predicat(node))
9456            count++;
9457         if ( node==targetNode )
9458             return count;
9459     }
9460     return -1;
9461 }
9462 
9463 // Using names, new, without boxing elements, so: normalized
toStringV2()9464 lString32 ldomXPointer::toStringV2()
9465 {
9466     lString32 path;
9467     if ( isNull() )
9468         return path;
9469     ldomNode * node = getNode();
9470     int offset = getOffset();
9471     ldomNode * p = node;
9472     if ( !node->isBoxingNode(true) ) { // (nor pseudoElem)
9473         if ( offset >= 0 ) {
9474             path << "." << fmt::decimal(offset);
9475         }
9476     }
9477     else {
9478         if ( offset < p->getChildCount() )
9479             p = p->getChildNode(offset);
9480         else
9481             p = p->getParentNode();
9482     }
9483     ldomNode * mainNode = node->getDocument()->getRootNode();
9484     while (p && p!=mainNode) {
9485         ldomNode * parent = p->getParentNode();
9486         while( isBoxingNode(parent) )
9487             parent = parent->getParentNode();
9488         if ( p->isElement() ) {
9489             // element
9490             lString32 name = p->getNodeName();
9491             if ( !parent )
9492                 return "/" + name + path;
9493             int count = 0;
9494             ldomNodeIdPredicate predicat(p->getNodeId());
9495             int index = getElementIndex(parent, p, predicat, count);
9496             if ( count == 1 ) {
9497                 // We're first, but see if we have following siblings with the
9498                 // same element name, so we can have "div[1]" instead of "div"
9499                 // when parent has more than one of it (as toStringV1 does).
9500                 ldomNode * n = p;
9501                 while ( ( n = n->getUnboxedNextSibling(true) ) ) {
9502                     if ( predicat(n) ) { // We have such a followup sibling
9503                         count = 2; // there's at least 2 of them
9504                         break;
9505                     }
9506                 }
9507             }
9508             if ( count>1 )
9509                 path = cs32("/") + name + "[" + fmt::decimal(index) + "]" + path;
9510             else
9511                 path = cs32("/") + name + path;
9512         } else {
9513             // text
9514             if ( !parent )
9515                 return cs32("/text()") + path;
9516             int count = 0;
9517             int index = getElementIndex(parent, p, isTextNode, count);
9518             if ( count == 1 ) {
9519                 // We're first, but see if we have following text siblings,
9520                 // so we can have "text()[1]" instead of "text()" when
9521                 // parent has more than one text node (as toStringV1 does).
9522                 ldomNode * n = p;
9523                 while ( ( n = n->getUnboxedNextSibling(false) ) ) {
9524                     if ( isTextNode(n) ) { // We have such a followup sibling
9525                         count = 2; // there's at least 2 of them
9526                         break;
9527                     }
9528                 }
9529             }
9530             if ( count>1 )
9531                 path = cs32("/text()") + "[" + fmt::decimal(index) + "]" + path;
9532             else
9533                 path = "/text()" + path;
9534         }
9535         p = parent;
9536     }
9537     return path;
9538 }
9539 
9540 // Without element names, normalized (not used)
toStringV2AsIndexes()9541 lString32 ldomXPointer::toStringV2AsIndexes()
9542 {
9543     lString32 path;
9544     if ( isNull() )
9545         return path;
9546     int offset = getOffset();
9547     if ( offset >= 0 ) {
9548         path << "." << fmt::decimal(offset);
9549     }
9550     ldomNode * p = getNode();
9551     ldomNode * rootNode = p->getDocument()->getRootNode();
9552     while( p && p!=rootNode ) {
9553         ldomNode * parent = p->getParentNode();
9554         if ( !parent )
9555             return "/" + (p->isElement() ? p->getNodeName() : cs32("/text()")) + path;
9556 
9557         while( isBoxingNode(parent) )
9558             parent = parent->getParentNode();
9559 
9560         int count = 0;
9561         int index = getElementIndex(parent, p, notNull, count);
9562 
9563         if( index>0 ) {
9564             path = cs32("/") + fmt::decimal(index) + path;
9565         } else {
9566             CRLog::error("!!! child node not found in a parent");
9567         }
9568         p = parent;
9569     }
9570     return path;
9571 }
9572 
9573 #if BUILD_LITE!=1
getFullHeight()9574 int ldomDocument::getFullHeight()
9575 {
9576     RenderRectAccessor rd( this->getRootNode() );
9577     return rd.getHeight() + rd.getY();
9578 }
9579 #endif
9580 
9581 
9582 
9583 
extractDocAuthors(ldomDocument * doc,lString32 delimiter,bool shortMiddleName)9584 lString32 extractDocAuthors( ldomDocument * doc, lString32 delimiter, bool shortMiddleName )
9585 {
9586     if ( delimiter.empty() )
9587         delimiter = ", ";
9588     lString32 authors;
9589     for ( int i=0; i<16; i++) {
9590         lString32 path = cs32("/FictionBook/description/title-info/author[") + fmt::decimal(i+1) + "]";
9591         ldomXPointer pauthor = doc->createXPointer(path);
9592         if ( !pauthor ) {
9593             //CRLog::trace( "xpath not found: %s", UnicodeToUtf8(path).c_str() );
9594             break;
9595         }
9596         lString32 firstName = pauthor.relative( U"/first-name" ).getText().trim();
9597         lString32 lastName = pauthor.relative( U"/last-name" ).getText().trim();
9598         lString32 middleName = pauthor.relative( U"/middle-name" ).getText().trim();
9599         lString32 author = firstName;
9600         if ( !author.empty() )
9601             author += " ";
9602         if ( !middleName.empty() )
9603             author += shortMiddleName ? lString32(middleName, 0, 1) + "." : middleName;
9604         if ( !lastName.empty() && !author.empty() )
9605             author += " ";
9606         author += lastName;
9607         if ( !authors.empty() )
9608             authors += delimiter;
9609         authors += author;
9610     }
9611     return authors;
9612 }
9613 
extractDocTitle(ldomDocument * doc)9614 lString32 extractDocTitle( ldomDocument * doc )
9615 {
9616     return doc->createXPointer(U"/FictionBook/description/title-info/book-title").getText().trim();
9617 }
9618 
extractDocLanguage(ldomDocument * doc)9619 lString32 extractDocLanguage( ldomDocument * doc )
9620 {
9621     return doc->createXPointer(U"/FictionBook/description/title-info/lang").getText().trim();
9622 }
9623 
extractDocSeries(ldomDocument * doc,int * pSeriesNumber)9624 lString32 extractDocSeries( ldomDocument * doc, int * pSeriesNumber )
9625 {
9626     lString32 res;
9627     ldomNode * series = doc->createXPointer(U"/FictionBook/description/title-info/sequence").getNode();
9628     if ( series ) {
9629         lString32 sname = lString32(series->getAttributeValue(attr_name)).trim();
9630         lString32 snumber = series->getAttributeValue(attr_number);
9631         if ( !sname.empty() ) {
9632             if ( pSeriesNumber ) {
9633                 *pSeriesNumber = snumber.atoi();
9634                 res = sname;
9635             } else {
9636                 res << "(" << sname;
9637                 if ( !snumber.empty() )
9638                     res << " #" << snumber << ")";
9639             }
9640         }
9641     }
9642     return res;
9643 }
9644 
extractDocKeywords(ldomDocument * doc)9645 lString32 extractDocKeywords( ldomDocument * doc )
9646 {
9647     lString32 res;
9648 #if 0
9649     // Year
9650     res << doc->createXPointer(U"/FictionBook/description/title-info/date").getText().trim();
9651 #endif
9652     // Genres
9653     // We use "\n" as a separator here, so if you change it here, you must also change it in
9654     // Engine.scanBookPropertiesInternal(), DocView.updateBookInfoInternal().
9655     for ( int i=0; i<16; i++) {
9656         lString32 path = cs32("/FictionBook/description/title-info/genre[") + fmt::decimal(i+1) + "]";
9657         ldomXPointer genre = doc->createXPointer(path);
9658         if ( !genre ) {
9659             break;
9660         }
9661         lString32 text = genre.getText().trim();
9662         if (!text.empty()) {
9663             if (!res.empty())
9664                 res << "\n";
9665             res << text;
9666         }
9667     }
9668     return res;
9669 }
9670 
extractDocDescription(ldomDocument * doc)9671 lString32 extractDocDescription( ldomDocument * doc )
9672 {
9673     // We put all other FB2 meta info in this description
9674     lString32 res;
9675 
9676     // Annotation (description)
9677     res << doc->createXPointer(U"/FictionBook/description/title-info/annotation").getText().trim();
9678 
9679     // Translators
9680     lString32 translators;
9681     int nbTranslators = 0;
9682     for ( int i=0; i<16; i++) {
9683         lString32 path = cs32("/FictionBook/description/title-info/translator[") + fmt::decimal(i+1) + "]";
9684         ldomXPointer ptranslator = doc->createXPointer(path);
9685         if ( !ptranslator ) {
9686             break;
9687         }
9688         lString32 firstName = ptranslator.relative( U"/first-name" ).getText().trim();
9689         lString32 lastName = ptranslator.relative( U"/last-name" ).getText().trim();
9690         lString32 middleName = ptranslator.relative( U"/middle-name" ).getText().trim();
9691         lString32 translator = firstName;
9692         if ( !translator.empty() )
9693             translator += " ";
9694         if ( !middleName.empty() )
9695             translator += middleName;
9696         if ( !lastName.empty() && !translator.empty() )
9697             translator += " ";
9698         translator += lastName;
9699         if ( !translators.empty() )
9700             translators << "\n";
9701         translators << translator;
9702         nbTranslators++;
9703     }
9704     if ( !translators.empty() ) {
9705         if ( !res.empty() )
9706             res << "\n\n";
9707         if ( nbTranslators > 1 )
9708             res << "Translators:\n" << translators;
9709         else
9710             res << "Translator: " << translators;
9711     }
9712 
9713     // Publication info & publisher
9714     ldomXPointer publishInfo = doc->createXPointer(U"/FictionBook/description/publish-info");
9715     if ( !publishInfo.isNull() ) {
9716         lString32 publisher = publishInfo.relative( U"/publisher" ).getText().trim();
9717         lString32 pubcity = publishInfo.relative( U"/city" ).getText().trim();
9718         lString32 pubyear = publishInfo.relative( U"/year" ).getText().trim();
9719         lString32 isbn = publishInfo.relative( U"/isbn" ).getText().trim();
9720         lString32 bookName = publishInfo.relative( U"/book-name" ).getText().trim();
9721         lString32 publication;
9722         if ( !publisher.empty() || !pubcity.empty() ) {
9723             if ( !publisher.empty() ) {
9724                 publication << publisher;
9725             }
9726             if ( !pubcity.empty() ) {
9727                 if ( !!publisher.empty() ) {
9728                     publication << ", ";
9729                 }
9730                 publication << pubcity;
9731             }
9732         }
9733         if ( !pubyear.empty() || !isbn.empty() ) {
9734             if ( !publication.empty() )
9735                 publication << "\n";
9736             if ( !pubyear.empty() ) {
9737                 publication << pubyear;
9738             }
9739             if ( !isbn.empty() ) {
9740                 if ( !pubyear.empty() ) {
9741                     publication << ", ";
9742                 }
9743                 publication << isbn;
9744             }
9745         }
9746         if ( !bookName.empty() ) {
9747             if ( !publication.empty() )
9748                 publication << "\n";
9749             publication << bookName;
9750         }
9751         if ( !publication.empty() ) {
9752             if ( !res.empty() )
9753                 res << "\n\n";
9754             res << "Publication:\n" << publication;
9755         }
9756     }
9757 
9758     // Document info
9759     ldomXPointer pDocInfo = doc->createXPointer(U"/FictionBook/description/document-info");
9760     if ( !pDocInfo.isNull() ) {
9761         lString32 docInfo;
9762         lString32 docAuthors;
9763         int nbAuthors = 0;
9764         for ( int i=0; i<16; i++) {
9765             lString32 path = cs32("/FictionBook/description/document-info/author[") + fmt::decimal(i+1) + "]";
9766             ldomXPointer pdocAuthor = doc->createXPointer(path);
9767             if ( !pdocAuthor ) {
9768                 break;
9769             }
9770             lString32 firstName = pdocAuthor.relative( U"/first-name" ).getText().trim();
9771             lString32 lastName = pdocAuthor.relative( U"/last-name" ).getText().trim();
9772             lString32 middleName = pdocAuthor.relative( U"/middle-name" ).getText().trim();
9773             lString32 docAuthor = firstName;
9774             if ( !docAuthor.empty() )
9775                 docAuthor += " ";
9776             if ( !middleName.empty() )
9777                 docAuthor += middleName;
9778             if ( !lastName.empty() && !docAuthor.empty() )
9779                 docAuthor += " ";
9780             docAuthor += lastName;
9781             if ( !docAuthors.empty() )
9782                 docAuthors << "\n";
9783             docAuthors << docAuthor;
9784             nbAuthors++;
9785         }
9786         if ( !docAuthors.empty() ) {
9787             if ( nbAuthors > 1 )
9788                 docInfo << "Authors:\n" << docAuthors;
9789             else
9790                 docInfo << "Author: " << docAuthors;
9791         }
9792         lString32 docPublisher = pDocInfo.relative( U"/publisher" ).getText().trim();
9793         lString32 docId = pDocInfo.relative( U"/id" ).getText().trim();
9794         lString32 docVersion = pDocInfo.relative( U"/version" ).getText().trim();
9795         lString32 docDate = pDocInfo.relative( U"/date" ).getText().trim();
9796         lString32 docHistory = pDocInfo.relative( U"/history" ).getText().trim();
9797         lString32 docSrcUrl = pDocInfo.relative( U"/src-url" ).getText().trim();
9798         lString32 docSrcOcr = pDocInfo.relative( U"/src-ocr" ).getText().trim();
9799         lString32 docProgramUsed = pDocInfo.relative( U"/program-used" ).getText().trim();
9800         if ( !docPublisher.empty() ) {
9801             if ( !docInfo.empty() )
9802                 docInfo << "\n";
9803             docInfo << "Publisher: " << docPublisher;
9804         }
9805         if ( !docId.empty() ) {
9806             if ( !docInfo.empty() )
9807                 docInfo << "\n";
9808             docInfo << "Id: " << docId;
9809         }
9810         if ( !docVersion.empty() ) {
9811             if ( !docInfo.empty() )
9812                 docInfo << "\n";
9813             docInfo << "Version: " << docVersion;
9814         }
9815         if ( !docDate.empty() ) {
9816             if ( !docInfo.empty() )
9817                 docInfo << "\n";
9818             docInfo << "Date: " << docDate;
9819         }
9820         if ( !docHistory.empty() ) {
9821             if ( !docInfo.empty() )
9822                 docInfo << "\n";
9823             docInfo << "History: " << docHistory;
9824         }
9825         if ( !docSrcUrl.empty() ) {
9826             if ( !docInfo.empty() )
9827                 docInfo << "\n";
9828             docInfo << "URL: " << docSrcUrl;
9829         }
9830         if ( !docSrcOcr.empty() ) {
9831             if ( !docInfo.empty() )
9832                 docInfo << "\n";
9833             docInfo << "OCR: " << docSrcOcr;
9834         }
9835         if ( !docProgramUsed.empty() ) {
9836             if ( !docInfo.empty() )
9837                 docInfo << "\n";
9838             docInfo << "Application: " << docProgramUsed;
9839         }
9840         if ( !docInfo.empty() ) {
9841             if ( !res.empty() )
9842                 res << "\n\n";
9843             res << "Document:\n" << docInfo;
9844         }
9845     }
9846 
9847     return res;
9848 }
9849 
initIndex()9850 void ldomXPointerEx::initIndex()
9851 {
9852     int m[MAX_DOM_LEVEL];
9853     ldomNode * p = getNode();
9854     _level = 0;
9855     while ( p ) {
9856         m[_level] = p->getNodeIndex();
9857         _level++;
9858         if ( _level == MAX_DOM_LEVEL ) {
9859             getDocument()->printWarning("ldomXPointerEx level overflow (too many nested nodes)", 1);
9860             break;
9861         }
9862         p = p->getParentNode();
9863     }
9864     for ( int i=0; i<_level; i++ ) {
9865         _indexes[ i ] = m[ _level - i - 1 ];
9866     }
9867 }
9868 
9869 /// move to sibling #
sibling(int index)9870 bool ldomXPointerEx::sibling( int index )
9871 {
9872     if ( _level <= 1 )
9873         return false;
9874     ldomNode * p = getNode()->getParentNode();
9875     if ( !p || index < 0 || index >= (int)p->getChildCount() )
9876         return false;
9877     setNode( p->getChildNode( index ) );
9878     setOffset(0);
9879     _indexes[ _level-1 ] = index;
9880     return true;
9881 }
9882 
9883 /// move to next sibling
nextSibling()9884 bool ldomXPointerEx::nextSibling()
9885 {
9886     if ( _level <= 1 )
9887         return false;
9888     return sibling( _indexes[_level-1] + 1 );
9889 }
9890 
9891 /// move to previous sibling
prevSibling()9892 bool ldomXPointerEx::prevSibling()
9893 {
9894     if ( _level <= 1 )
9895         return false;
9896     return sibling( _indexes[_level-1] - 1 );
9897 }
9898 
9899 /// move to next sibling element
nextSiblingElement()9900 bool ldomXPointerEx::nextSiblingElement()
9901 {
9902     if ( _level <= 1 )
9903         return false;
9904     ldomNode * node = getNode();
9905     ldomNode * p = node->getParentNode();
9906     for ( int i=_indexes[_level-1] + 1; i<(int)p->getChildCount(); i++ ) {
9907         if ( p->getChildNode( i )->isElement() )
9908             return sibling( i );
9909     }
9910     return false;
9911 }
9912 
9913 /// move to previous sibling element
prevSiblingElement()9914 bool ldomXPointerEx::prevSiblingElement()
9915 {
9916     if ( _level <= 1 )
9917         return false;
9918     ldomNode * node = getNode();
9919     ldomNode * p = node->getParentNode();
9920     for ( int i=_indexes[_level-1] - 1; i>=0; i-- ) {
9921         if ( p->getChildNode( i )->isElement() )
9922             return sibling( i );
9923     }
9924     return false;
9925 }
9926 
9927 /// move to next sibling or parent's next sibling
nextOuterElement()9928 bool ldomXPointerEx::nextOuterElement()
9929 {
9930     if ( !ensureElement() )
9931         return false;
9932     for (;;) {
9933         if ( nextSiblingElement() )
9934             return true;
9935         if ( !parent() )
9936             return false;
9937     }
9938 }
9939 
9940 /// move to (end of) last and deepest child node descendant of current node
lastInnerNode(bool toTextEnd)9941 bool ldomXPointerEx::lastInnerNode(bool toTextEnd)
9942 {
9943     if ( !getNode() )
9944         return false;
9945     while ( lastChild() ) {}
9946     if ( isText() && toTextEnd ) {
9947         setOffset(getNode()->getText().length());
9948     }
9949     return true;
9950 }
9951 
9952 /// move to (end of) last and deepest child text node descendant of current node
lastInnerTextNode(bool toTextEnd)9953 bool ldomXPointerEx::lastInnerTextNode(bool toTextEnd)
9954 {
9955     if ( !getNode() )
9956         return false;
9957     if ( isText() ) {
9958         if (toTextEnd)
9959             setOffset(getNode()->getText().length());
9960         return true;
9961     }
9962     if ( lastChild() ) {
9963         do {
9964             if (lastInnerTextNode(toTextEnd))
9965                 return true;
9966         } while ( prevSibling() );
9967         parent();
9968     }
9969     return false;
9970 
9971 }
9972 
9973 /// move to parent
parent()9974 bool ldomXPointerEx::parent()
9975 {
9976     if ( _level<=1 )
9977         return false;
9978     setNode( getNode()->getParentNode() );
9979     setOffset(0);
9980     _level--;
9981     return true;
9982 }
9983 
9984 /// move to child #
child(int index)9985 bool ldomXPointerEx::child( int index )
9986 {
9987     if ( _level >= MAX_DOM_LEVEL )
9988         return false;
9989     int count = getNode()->getChildCount();
9990     if ( index<0 || index>=count )
9991         return false;
9992     _indexes[ _level++ ] = index;
9993     setNode( getNode()->getChildNode( index ) );
9994     setOffset(0);
9995     return true;
9996 }
9997 
9998 /// compare two pointers, returns -1, 0, +1
compare(const ldomXPointerEx & v) const9999 int ldomXPointerEx::compare( const ldomXPointerEx& v ) const
10000 {
10001     int i;
10002     for ( i=0; i<_level && i<v._level; i++ ) {
10003         if ( _indexes[i] < v._indexes[i] )
10004             return -1;
10005         if ( _indexes[i] > v._indexes[i] )
10006             return 1;
10007     }
10008     if ( _level < v._level ) {
10009         return -1;
10010 //        if ( getOffset() < v._indexes[i] )
10011 //            return -1;
10012 //        if ( getOffset() > v._indexes[i] )
10013 //            return 1;
10014 //        return -1;
10015     }
10016     if ( _level > v._level ) {
10017         if ( _indexes[i] < v.getOffset() )
10018             return -1;
10019         if ( _indexes[i] > v.getOffset() )
10020             return 1;
10021         return 1;
10022     }
10023     if ( getOffset() < v.getOffset() )
10024         return -1;
10025     if ( getOffset() > v.getOffset() )
10026         return 1;
10027     return 0;
10028 }
10029 
10030 /// calls specified function recursively for all elements of DOM tree
recurseElements(void (* pFun)(ldomXPointerEx & node))10031 void ldomXPointerEx::recurseElements( void (*pFun)( ldomXPointerEx & node ) )
10032 {
10033     if ( !isElement() )
10034         return;
10035     pFun( *this );
10036     if ( child( 0 ) ) {
10037         do {
10038             recurseElements( pFun );
10039         } while ( nextSibling() );
10040         parent();
10041     }
10042 }
10043 
10044 /// calls specified function recursively for all nodes of DOM tree
recurseNodes(void (* pFun)(ldomXPointerEx & node))10045 void ldomXPointerEx::recurseNodes( void (*pFun)( ldomXPointerEx & node ) )
10046 {
10047     if ( !isElement() )
10048         return;
10049     pFun( *this );
10050     if ( child( 0 ) ) {
10051         do {
10052             recurseElements( pFun );
10053         } while ( nextSibling() );
10054         parent();
10055     }
10056 }
10057 
10058 /// returns true if this interval intersects specified interval
checkIntersection(ldomXRange & v)10059 bool ldomXRange::checkIntersection( ldomXRange & v )
10060 {
10061     if ( isNull() || v.isNull() )
10062         return false;
10063     if ( _end.compare( v._start ) < 0 )
10064         return false;
10065     if ( _start.compare( v._end ) > 0 )
10066         return false;
10067     return true;
10068 }
10069 
10070 /// create list by filtering existing list, to get only values which intersect filter range
ldomXRangeList(ldomXRangeList & srcList,ldomXRange & filter)10071 ldomXRangeList::ldomXRangeList( ldomXRangeList & srcList, ldomXRange & filter )
10072 {
10073     for ( int i=0; i<srcList.length(); i++ ) {
10074         if ( srcList[i]->checkIntersection( filter ) )
10075             LVPtrVector<ldomXRange>::add( new ldomXRange( *srcList[i] ) );
10076     }
10077 }
10078 
10079 /// copy constructor of full node range
ldomXRange(ldomNode * p,bool fitEndToLastInnerChild)10080 ldomXRange::ldomXRange( ldomNode * p, bool fitEndToLastInnerChild )
10081 : _start( p, 0 ), _end( p, p->isText() ? p->getText().length() : p->getChildCount() ), _flags(1)
10082 {
10083     // Note: the above initialization seems wrong: for a non-text
10084     // node, offset seems of no-use, and setting it to the number
10085     // of children wouldn't matter (and if the original aim was to
10086     // extend end to include the last child, the range would ignore
10087     // this last child descendants).
10088     // The following change might well be the right behaviour expected
10089     // from ldomXRange(ldomNode) and fixing a bug, but let's keep
10090     // this "fixed" behaviour an option
10091     if (fitEndToLastInnerChild && !p->isText()) {
10092         // Update _end to point to the last deepest inner child node,
10093         // and to the end of its text if it is a text npde.
10094         ldomXPointerEx tmp = _start;
10095         if (tmp.lastInnerNode(true)) {
10096             _end = tmp;
10097         }
10098     }
10099     // Note: code that walks or compare a ldomXRange may include or
10100     // exclude the _end: most often, it's excluded.
10101     // If it is a text node, the end points to text.length(), so after the
10102     // last char, and it then includes the last char.
10103     // If it is a non-text node, we could choose to include or exclude it
10104     // in XPointers comparisons. Including it would have the node included,
10105     // but not its children (because a child is after its parent in
10106     // comparisons), which feels strange.
10107     // So, excluding it looks like the sanest choice.
10108     // But then, with fitEndToLastInnerChild, if that last inner child
10109     // is a <IMG> node, it will be the _end, but won't then be included
10110     // in the range... The proper way to include it then would be to use
10111     // ldomXPointerEx::nextOuterElement(), but this is just a trick (it
10112     // would fail if that node is the last in the document, and
10113     // getNearestParent() would move up unnecessary ancestors...)
10114     // So, better check the functions that we use to see how they would
10115     // cope with that case.
10116 }
10117 
_max(const ldomXPointerEx & v1,const ldomXPointerEx & v2)10118 static const ldomXPointerEx & _max( const ldomXPointerEx & v1,  const ldomXPointerEx & v2 )
10119 {
10120     int c = v1.compare( v2 );
10121     if ( c>=0 )
10122         return v1;
10123     else
10124         return v2;
10125 }
10126 
_min(const ldomXPointerEx & v1,const ldomXPointerEx & v2)10127 static const ldomXPointerEx & _min( const ldomXPointerEx & v1,  const ldomXPointerEx & v2 )
10128 {
10129     int c = v1.compare( v2 );
10130     if ( c<=0 )
10131         return v1;
10132     else
10133         return v2;
10134 }
10135 
10136 /// create intersection of two ranges
ldomXRange(const ldomXRange & v1,const ldomXRange & v2)10137 ldomXRange::ldomXRange( const ldomXRange & v1,  const ldomXRange & v2 )
10138     : _start( _max( v1._start, v2._start ) ), _end( _min( v1._end, v2._end ) )
10139 {
10140 }
10141 
10142 /// create list splittiny existing list into non-overlapping ranges
ldomXRangeList(ldomXRangeList & srcList,bool splitIntersections)10143 ldomXRangeList::ldomXRangeList( ldomXRangeList & srcList, bool splitIntersections )
10144 {
10145     if ( srcList.empty() )
10146         return;
10147     int i;
10148     if ( splitIntersections ) {
10149         ldomXRange * maxRange = new ldomXRange( *srcList[0] );
10150         for ( i=1; i<srcList.length(); i++ ) {
10151             if ( srcList[i]->getStart().compare( maxRange->getStart() ) < 0 )
10152                 maxRange->setStart( srcList[i]->getStart() );
10153             if ( srcList[i]->getEnd().compare( maxRange->getEnd() ) > 0 )
10154                 maxRange->setEnd( srcList[i]->getEnd() );
10155         }
10156         maxRange->setFlags(0);
10157         add( maxRange );
10158         for ( i=0; i<srcList.length(); i++ )
10159             split( srcList[i] );
10160         for ( int i=length()-1; i>=0; i-- ) {
10161             if ( get(i)->getFlags()==0 )
10162                 erase( i, 1 );
10163         }
10164     } else {
10165         for ( i=0; i<srcList.length(); i++ )
10166             add( new ldomXRange( *srcList[i] ) );
10167     }
10168 }
10169 
10170 /// split into subranges using intersection
split(ldomXRange * r)10171 void ldomXRangeList::split( ldomXRange * r )
10172 {
10173     int i;
10174     for ( i=0; i<length(); i++ ) {
10175         if ( r->checkIntersection( *get(i) ) ) {
10176             ldomXRange * src = remove( i );
10177             int cmp1 = src->getStart().compare( r->getStart() );
10178             int cmp2 = src->getEnd().compare( r->getEnd() );
10179             //TODO: add intersections
10180             if ( cmp1 < 0 && cmp2 < 0 ) {
10181                 //   0====== src ======0
10182                 //        X======= r=========X
10183                 //   1111122222222222222
10184                 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getStart(), src->getFlags() );
10185                 ldomXRange * r2 = new ldomXRange( r->getStart(), src->getEnd(), src->getFlags() | r->getFlags() );
10186                 insert( i++, r1 );
10187                 insert( i, r2 );
10188                 delete src;
10189             } else if ( cmp1 > 0 && cmp2 > 0 ) {
10190                 //           0====== src ======0
10191                 //     X======= r=========X
10192                 //           2222222222222233333
10193                 ldomXRange * r2 = new ldomXRange( src->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10194                 ldomXRange * r3 = new ldomXRange( r->getEnd(), src->getEnd(), src->getFlags() );
10195                 insert( i++, r2 );
10196                 insert( i, r3 );
10197                 delete src;
10198             } else if ( cmp1 < 0 && cmp2 > 0 ) {
10199                 // 0====== src ================0
10200                 //     X======= r=========X
10201                 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getStart(), src->getFlags() );
10202                 ldomXRange * r2 = new ldomXRange( r->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10203                 ldomXRange * r3 = new ldomXRange( r->getEnd(), src->getEnd(), src->getFlags() );
10204                 insert( i++, r1 );
10205                 insert( i++, r2 );
10206                 insert( i, r3 );
10207                 delete src;
10208             } else if ( cmp1 == 0 && cmp2 > 0 ) {
10209                 //   0====== src ========0
10210                 //   X====== r=====X
10211                 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10212                 ldomXRange * r2 = new ldomXRange( r->getEnd(), src->getEnd(), src->getFlags() );
10213                 insert( i++, r1 );
10214                 insert( i, r2 );
10215                 delete src;
10216             } else if ( cmp1 < 0 && cmp2 == 0 ) {
10217                 //   0====== src =====0
10218                 //      X====== r=====X
10219                 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getStart(), src->getFlags() );
10220                 ldomXRange * r2 = new ldomXRange( r->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10221                 insert( i++, r1 );
10222                 insert( i, r2 );
10223                 delete src;
10224             } else {
10225                 //        0====== src =====0
10226                 //   X============== r===========X
10227                 //
10228                 //        0====== src =====0
10229                 //   X============== r=====X
10230                 //
10231                 //   0====== src =====0
10232                 //   X============== r=====X
10233                 //
10234                 //   0====== src ========0
10235                 //   X========== r=======X
10236                 src->setFlags( src->getFlags() | r->getFlags() );
10237                 insert( i, src );
10238             }
10239         }
10240     }
10241 }
10242 
10243 #if BUILD_LITE!=1
10244 
findText(lString32 pattern,bool caseInsensitive,bool reverse,int minY,int maxY,LVArray<ldomWord> & words,int maxCount,int maxHeight,int maxHeightCheckStartY)10245 bool ldomDocument::findText( lString32 pattern, bool caseInsensitive, bool reverse, int minY, int maxY, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY )
10246 {
10247     if ( minY<0 )
10248         minY = 0;
10249     int fh = getFullHeight();
10250     if ( maxY<=0 || maxY>fh )
10251         maxY = fh;
10252     // ldomXPointer start = createXPointer( lvPoint(0, minY), reverse?-1:1 );
10253     // ldomXPointer end = createXPointer( lvPoint(10000, maxY), reverse?-1:1 );
10254     // If we're provided with minY or maxY in some empty space (margins, empty
10255     // elements...), they may not resolve to a XPointer.
10256     // Find a valid y near each of them that does resolve to a XPointer:
10257     // We also want to get start/end point to logical-order HTML nodes,
10258     // which might be different from visual-order in bidi text.
10259     ldomXPointer start;
10260     ldomXPointer end;
10261     for (int y = minY; y >= 0; y--) {
10262         start = createXPointer( lvPoint(0, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST
10263                                                        : PT_DIR_SCAN_FORWARD_LOGICAL_FIRST );
10264         if (!start.isNull())
10265             break;
10266     }
10267     if (start.isNull()) {
10268         // If none found (can happen when minY=0 and blank content at start
10269         // of document like a <br/>), scan forward from document start
10270         for (int y = 0; y <= fh; y++) {
10271             start = createXPointer( lvPoint(0, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST
10272                                                            : PT_DIR_SCAN_FORWARD_LOGICAL_FIRST );
10273             if (!start.isNull())
10274                 break;
10275         }
10276     }
10277     for (int y = maxY; y <= fh; y++) {
10278         end = createXPointer( lvPoint(10000, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_LAST
10279                                                          : PT_DIR_SCAN_FORWARD_LOGICAL_LAST );
10280         if (!end.isNull())
10281             break;
10282     }
10283     if (end.isNull()) {
10284         // If none found (can happen when maxY=fh and blank content at end
10285         // of book like a <br/>), scan backward from document end
10286         for (int y = fh; y >= 0; y--) {
10287             end = createXPointer( lvPoint(10000, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_LAST
10288                                                              : PT_DIR_SCAN_FORWARD_LOGICAL_LAST );
10289             if (!end.isNull())
10290                 break;
10291         }
10292     }
10293 
10294     if ( start.isNull() || end.isNull() )
10295         return false;
10296     ldomXRange range( start, end );
10297     CRLog::debug("ldomDocument::findText() for Y %d..%d, range %d..%d",
10298                  minY, maxY, start.toPoint().y, end.toPoint().y);
10299     if ( range.getStart().toPoint().y==-1 ) {
10300         range.getStart().nextVisibleText();
10301         CRLog::debug("ldomDocument::findText() updated range %d..%d",
10302                      range.getStart().toPoint().y, range.getEnd().toPoint().y);
10303     }
10304     if ( range.getEnd().toPoint().y==-1 ) {
10305         range.getEnd().prevVisibleText();
10306         CRLog::debug("ldomDocument::findText() updated range %d..%d",
10307                      range.getStart().toPoint().y, range.getEnd().toPoint().y);
10308     }
10309     if ( range.isNull() ) {
10310         CRLog::debug("No text found: Range is empty");
10311         return false;
10312     }
10313     return range.findText( pattern, caseInsensitive, reverse, words, maxCount, maxHeight, maxHeightCheckStartY );
10314 }
10315 
findText(const lString32 & str,int & pos,int & endpos,const lString32 & pattern)10316 static bool findText( const lString32 & str, int & pos, int & endpos, const lString32 & pattern )
10317 {
10318     int len = pattern.length();
10319     if ( pos < 0 || pos + len > (int)str.length() )
10320         return false;
10321     const lChar32 * s1 = str.c_str() + pos;
10322     const lChar32 * s2 = pattern.c_str();
10323     int nlen = str.length() - pos - len;
10324     for ( int j=0; j<=nlen; j++ ) {
10325         bool matched = true;
10326         int nsofthyphens = 0; // There can be soft-hyphen in str, but not in pattern
10327         for ( int i=0; i<len; i++ ) {
10328             while ( i+nsofthyphens < nlen && s1[i+nsofthyphens] == UNICODE_SOFT_HYPHEN_CODE ) {
10329                 nsofthyphens += 1;
10330             }
10331             if ( s1[i+nsofthyphens] != s2[i] ) {
10332                 matched = false;
10333                 break;
10334             }
10335         }
10336         if ( matched ) {
10337             endpos = pos + len + nsofthyphens;
10338             return true;
10339         }
10340         s1++;
10341         pos++;
10342     }
10343     return false;
10344 }
10345 
findTextRev(const lString32 & str,int & pos,int & endpos,const lString32 & pattern)10346 static bool findTextRev( const lString32 & str, int & pos, int & endpos, const lString32 & pattern )
10347 {
10348     int len = pattern.length();
10349     if ( pos+len>(int)str.length() )
10350         pos = str.length()-len;
10351     if ( pos < 0 )
10352         return false;
10353     const lChar32 * s1 = str.c_str() + pos;
10354     const lChar32 * s2 = pattern.c_str();
10355     int nlen = pos;
10356     for ( int j=nlen; j>=0; j-- ) {
10357         bool matched = true;
10358         int nsofthyphens = 0; // There can be soft-hyphen in str, but not in pattern
10359         for ( int i=0; i<len; i++ ) {
10360             while ( i+nsofthyphens < nlen && s1[i+nsofthyphens] == UNICODE_SOFT_HYPHEN_CODE ) {
10361                 nsofthyphens += 1;
10362             }
10363             if ( s1[i+nsofthyphens] != s2[i] ) {
10364                 matched = false;
10365                 break;
10366             }
10367         }
10368         if ( matched ) {
10369             endpos = pos + len + nsofthyphens;
10370             return true;
10371         }
10372         s1--;
10373         pos--;
10374     }
10375     return false;
10376 }
10377 
10378 /// searches for specified text inside range
findText(lString32 pattern,bool caseInsensitive,bool reverse,LVArray<ldomWord> & words,int maxCount,int maxHeight,int maxHeightCheckStartY,bool checkMaxFromStart)10379 bool ldomXRange::findText( lString32 pattern, bool caseInsensitive, bool reverse, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY, bool checkMaxFromStart )
10380 {
10381     if ( caseInsensitive )
10382         pattern.lowercase();
10383     words.clear();
10384     if ( pattern.empty() )
10385         return false;
10386     if ( reverse ) {
10387         // reverse search
10388         if ( !_end.isText() ) {
10389             _end.prevVisibleText();
10390             lString32 txt = _end.getNode()->getText();
10391             _end.setOffset(txt.length());
10392         }
10393         int firstFoundTextY = -1;
10394         while ( !isNull() ) {
10395 
10396             lString32 txt = _end.getNode()->getText();
10397             int offs = _end.getOffset();
10398             int endpos;
10399 
10400             if ( firstFoundTextY!=-1 && maxHeight>0 ) {
10401                 ldomXPointer p( _end.getNode(), offs );
10402                 int currentTextY = p.toPoint().y;
10403                 if ( currentTextY<firstFoundTextY-maxHeight )
10404                     return words.length()>0;
10405             }
10406 
10407             if ( caseInsensitive )
10408                 txt.lowercase();
10409 
10410             while ( ::findTextRev( txt, offs, endpos, pattern ) ) {
10411                 if ( firstFoundTextY==-1 && maxHeight>0 ) {
10412                     ldomXPointer p( _end.getNode(), offs );
10413                     int currentTextY = p.toPoint().y;
10414                     if (maxHeightCheckStartY == -1 || currentTextY <= maxHeightCheckStartY)
10415                         firstFoundTextY = currentTextY;
10416                 }
10417                 words.add( ldomWord(_end.getNode(), offs, endpos ) );
10418                 offs--;
10419             }
10420             if ( !_end.prevVisibleText() )
10421                 break;
10422             txt = _end.getNode()->getText();
10423             _end.setOffset(txt.length());
10424             if ( words.length() >= maxCount )
10425                 break;
10426         }
10427     } else {
10428         // direct search
10429         if ( !_start.isText() )
10430             _start.nextVisibleText();
10431         int firstFoundTextY = -1;
10432         if (checkMaxFromStart) {
10433 			ldomXPointer p( _start.getNode(), _start.getOffset() );
10434 			firstFoundTextY = p.toPoint().y;
10435 		}
10436         while ( !isNull() ) {
10437             int offs = _start.getOffset();
10438             int endpos;
10439 
10440             if ( firstFoundTextY!=-1 && maxHeight>0 ) {
10441                 ldomXPointer p( _start.getNode(), offs );
10442                 int currentTextY = p.toPoint().y;
10443                 if ( (checkMaxFromStart && currentTextY>=firstFoundTextY+maxHeight) ||
10444 					currentTextY>firstFoundTextY+maxHeight )
10445                     return words.length()>0;
10446             }
10447 
10448             lString32 txt = _start.getNode()->getText();
10449             if ( caseInsensitive )
10450                 txt.lowercase();
10451 
10452             while ( ::findText( txt, offs, endpos, pattern ) ) {
10453                 if ( firstFoundTextY==-1 && maxHeight>0 ) {
10454                     ldomXPointer p( _start.getNode(), offs );
10455                     int currentTextY = p.toPoint().y;
10456                     if (checkMaxFromStart) {
10457                         if ( currentTextY>=firstFoundTextY+maxHeight )
10458                             return words.length()>0;
10459                     } else {
10460                         if (maxHeightCheckStartY == -1 || currentTextY >= maxHeightCheckStartY)
10461                             firstFoundTextY = currentTextY;
10462                     }
10463                 }
10464                 words.add( ldomWord(_start.getNode(), offs, endpos ) );
10465                 offs++;
10466             }
10467             if ( !_start.nextVisibleText() )
10468                 break;
10469             if ( words.length() >= maxCount )
10470                 break;
10471         }
10472     }
10473     return words.length() > 0;
10474 }
10475 
10476 /// fill marked ranges list
10477 // Transform a list of ldomXRange (start and end xpointers) into a list
10478 // of ldomMarkedRange (start and end point coordinates) for native
10479 // drawing of highlights
getRanges(ldomMarkedRangeList & dst)10480 void ldomXRangeList::getRanges( ldomMarkedRangeList &dst )
10481 {
10482     dst.clear();
10483     if ( empty() )
10484         return;
10485     for ( int i=0; i<length(); i++ ) {
10486         ldomXRange * range = get(i);
10487         if (range->getFlags() < 0x10) {
10488             // Legacy marks drawing: make a single ldomMarkedRange spanning
10489             // multiple lines, assuming full width LTR paragraphs)
10490             // (Updated to use toPoint(extended=true) to have them shifted
10491             // by the margins and paddings of final blocks, to be compatible
10492             // with getSegmentRects() below that does that internally.)
10493             lvPoint ptStart = range->getStart().toPoint(true);
10494             lvPoint ptEnd = range->getEnd().toPoint(true);
10495             // LVE:DEBUG
10496             // CRLog::trace("selectRange( %d,%d : %d,%d : %s, %s )", ptStart.x, ptStart.y, ptEnd.x, ptEnd.y,
10497             //              LCSTR(range->getStart().toString()), LCSTR(range->getEnd().toString()) );
10498             if ( ptStart.y > ptEnd.y || ( ptStart.y == ptEnd.y && ptStart.x >= ptEnd.x ) ) {
10499                 // Swap ptStart and ptEnd if coordinates seems inverted (or we would
10500                 // get item->empty()), which is needed for bidi/rtl.
10501                 // Hoping this has no side effect.
10502                 lvPoint ptTmp = ptStart;
10503                 ptStart = ptEnd;
10504                 ptEnd = ptTmp;
10505             }
10506             ldomMarkedRange * item = new ldomMarkedRange( ptStart, ptEnd, range->getFlags() );
10507             if ( !item->empty() )
10508                 dst.add( item );
10509             else
10510                 delete item;
10511         }
10512         else {
10513             // Enhanced marks drawing: from a single ldomXRange, make multiple segmented
10514             // ldomMarkedRange, each spanning a single line.
10515             LVArray<lvRect> rects;
10516             range->getSegmentRects(rects);
10517             for (int i=0; i<rects.length(); i++) {
10518                 lvRect r = rects[i];
10519                 // printf("r %d %dx%d %dx%d\n", i, r.topLeft().x, r.topLeft().y, r.bottomRight().x, r.bottomRight().y);
10520                 ldomMarkedRange * item = new ldomMarkedRange( r.topLeft(), r.bottomRight(), range->getFlags() );
10521                 if ( !item->empty() )
10522                     dst.add( item );
10523                 else
10524                     delete item;
10525             }
10526         }
10527     }
10528 }
10529 
10530 /// fill text selection list by splitting text into monotonic flags ranges
splitText(ldomMarkedTextList & dst,ldomNode * textNodeToSplit)10531 void ldomXRangeList::splitText( ldomMarkedTextList &dst, ldomNode * textNodeToSplit )
10532 {
10533     lString32 text = textNodeToSplit->getText();
10534     if ( length()==0 ) {
10535         dst.add( new ldomMarkedText( text, 0, 0 ) );
10536         return;
10537     }
10538     ldomXRange textRange( textNodeToSplit );
10539     ldomXRangeList ranges;
10540     ranges.add( new ldomXRange(textRange) );
10541     int i;
10542     for ( i=0; i<length(); i++ ) {
10543         ranges.split( get(i) );
10544     }
10545     for ( i=0; i<ranges.length(); i++ ) {
10546         ldomXRange * r = ranges[i];
10547         int start = r->getStart().getOffset();
10548         int end = r->getEnd().getOffset();
10549         if ( end>start )
10550             dst.add( new ldomMarkedText( text.substr(start, end-start), r->getFlags(), start ) );
10551     }
10552     /*
10553     if ( dst.length() ) {
10554         CRLog::debug(" splitted: ");
10555         for ( int k=0; k<dst.length(); k++ ) {
10556             CRLog::debug("    (%d, %d) %s", dst[k]->offset, dst[k]->flags, UnicodeToUtf8(dst[k]->text).c_str());
10557         }
10558     }
10559     */
10560 }
10561 
10562 /// returns rectangle (in doc coordinates) for range. Returns true if found.
10563 // Note that this works correctly only when start and end are in the
10564 // same text node.
getRectEx(lvRect & rect,bool & isSingleLine)10565 bool ldomXRange::getRectEx( lvRect & rect, bool & isSingleLine )
10566 {
10567     isSingleLine = false;
10568     if ( isNull() )
10569         return false;
10570     // get start and end rects
10571     lvRect rc1;
10572     lvRect rc2;
10573     // inner=true if enhanced rendering, to directly get the inner coordinates,
10574     // so no need to compute paddings (as done below for legacy rendering)
10575     if ( !getStart().getRect(rc1, true) || !getEnd().getRect(rc2, true) )
10576         return false;
10577     ldomNode * finalNode1 = getStart().getFinalNode();
10578     ldomNode * finalNode2 = getEnd().getFinalNode();
10579     if ( !finalNode1 || !finalNode2 ) {
10580         // Shouldn't happen, but prevent a segfault in case some other bug
10581         // in initNodeRendMethod made some text not having an erm_final ancestor.
10582         if ( !finalNode1 )
10583             printf("CRE WARNING: no final parent for range start %s\n", UnicodeToLocal(getStart().toString()).c_str());
10584         if ( !finalNode2 )
10585             printf("CRE WARNING: no final parent for range end %s\n", UnicodeToLocal(getEnd().toString()).c_str());
10586         return false;
10587     }
10588     RenderRectAccessor fmt1(finalNode1);
10589     RenderRectAccessor fmt2(finalNode2);
10590     // In legacy mode, we just got the erm_final coordinates, and we must
10591     // compute and add left/top border and padding (using rc.width() as
10592     // the base for % is wrong here, and so is rc.height() for padding top)
10593     if ( ! RENDER_RECT_HAS_FLAG(fmt1, INNER_FIELDS_SET) ) {
10594         int em = finalNode1->getFont()->getSize();
10595         int padding_left = measureBorder(finalNode1,3) + lengthToPx(finalNode1->getStyle()->padding[0], fmt1.getWidth(), em);
10596         int padding_top = measureBorder(finalNode1,0) + lengthToPx(finalNode1->getStyle()->padding[2], fmt1.getWidth(), em);
10597         rc1.top += padding_top;
10598         rc1.left += padding_left;
10599         rc1.right += padding_left;
10600         rc1.bottom += padding_top;
10601     }
10602     if ( ! RENDER_RECT_HAS_FLAG(fmt2, INNER_FIELDS_SET) ) {
10603         int em = finalNode2->getFont()->getSize();
10604         int padding_left = measureBorder(finalNode2,3) + lengthToPx(finalNode2->getStyle()->padding[0], fmt2.getWidth(), em);
10605         int padding_top = measureBorder(finalNode2,0) + lengthToPx(finalNode2->getStyle()->padding[2], fmt2.getWidth(), em);
10606         rc2.top += padding_top;
10607         rc2.left += padding_left;
10608         rc2.right += padding_left;
10609         rc2.bottom += padding_top;
10610     }
10611     if ( rc1.top == rc2.top && rc1.bottom == rc2.bottom ) {
10612         // on same line
10613         rect.left = rc1.left;
10614         rect.top = rc1.top;
10615         rect.right = rc2.right;
10616         rect.bottom = rc2.bottom;
10617         isSingleLine = true;
10618         return !rect.isEmpty();
10619     }
10620     // on different lines
10621     ldomNode * parent = getNearestCommonParent();
10622     if ( !parent )
10623         return false;
10624     parent->getAbsRect(rect);
10625     rect.top = rc1.top;
10626     rect.bottom = rc2.bottom;
10627     return !rect.isEmpty();
10628 }
10629 
10630 // Returns the multiple segments (rectangle for each text line) that
10631 // this ldomXRange spans on the page.
10632 // The text content from S to E on this page will push 4 segments:
10633 //   ......
10634 //   ...S==
10635 //   ======
10636 //   ======
10637 //   ==E..
10638 //   ......
getSegmentRects(LVArray<lvRect> & rects)10639 void ldomXRange::getSegmentRects( LVArray<lvRect> & rects )
10640 {
10641     bool go_on = true;
10642     int lcount = 1;
10643     lvRect lineStartRect = lvRect();
10644     lvRect nodeStartRect = lvRect();
10645     lvRect curCharRect = lvRect();
10646     lvRect prevCharRect = lvRect();
10647     ldomNode *prevFinalNode = NULL; // to add rect when we cross final nodes
10648 
10649     // We process range text node by text node (I thought rects' y-coordinates
10650     // comparisons were valid only for a same text node, but it seems all
10651     // text on a line get the same .top and .bottom, even if they have a
10652     // smaller font size - but using ldomXRange.getRectEx() on multiple
10653     // text nodes gives wrong rects for the last chars on a line...)
10654 
10655     // Note: someRect.extend(someOtherRect) and !someRect.isEmpty() expect
10656     // a rect to have both width and height non-zero. So, make sure
10657     // in getRectEx() that we always get a rect of width at least 1px,
10658     // otherwise some lines may not be highlighted.
10659 
10660     // Note: the range end offset is NOT part of the range (it points to the
10661     // char after, or last char + 1 if it includes the whole text node text)
10662     ldomXPointerEx rangeEnd = getEnd();
10663     ldomXPointerEx curPos = ldomXPointerEx( getStart() ); // copy, will change
10664     if (!curPos.isText()) // we only deal with text nodes: get the first
10665         go_on = curPos.nextText();
10666 
10667     while (go_on) { // new line or new/continued text node
10668         // We may have (empty or not if not yet pushed) from previous iteration:
10669         // lineStartRect : char rect for first char of line, even if from another text node
10670         // nodeStartRect : char rect of current char at curPos (calculated but not included
10671         //   in previous line), that is now the start of the line
10672         // The curPos.getRectEx(charRect) we use returns a rect for a single char, with
10673         // the width of the char. We then "extend" it to the char at end of line (or end
10674         // of range) to make a segment that we add to the provided &rects.
10675         // We use getRectEx() with adjusted=true, for fine tuned glyph rectangles
10676         // that include the excessive left or right side bearing.
10677 
10678         if (!curPos || curPos.isNull() || curPos.compare(rangeEnd) >= 0) {
10679             // no more text node, or after end of range: we're done
10680             break;
10681         }
10682 
10683         ldomNode *curFinalNode = curPos.getFinalNode();
10684         if (curFinalNode != prevFinalNode) {
10685             // Force a new segment if we're crossing final nodes, that is, when
10686             // we're no more in the same inline context (so we get a new segment
10687             // for each table cells that may happen to be rendered on the same line)
10688             if (! lineStartRect.isEmpty()) {
10689                 rects.add( lineStartRect );
10690                 lineStartRect = lvRect(); // reset
10691             }
10692             prevFinalNode = curFinalNode;
10693         }
10694 
10695         int startOffset = curPos.getOffset();
10696         lString32 nodeText = curPos.getText();
10697         int textLen = nodeText.length();
10698 
10699         if (startOffset == 0) { // new text node
10700             nodeStartRect = lvRect(); // reset
10701             if (textLen == 0) { // empty text node (not sure that can happen)
10702                 go_on = curPos.nextText();
10703                 continue;
10704             }
10705         }
10706         // Skip space at start of node or at start of new line
10707         // (the XML parser made sure we always have a single space
10708         // at boundaries)
10709         if (nodeText[startOffset] == ' ') {
10710             startOffset += 1;
10711             nodeStartRect = lvRect(); // reset
10712         }
10713         if (startOffset >= textLen) { // no more text in this node (or single space node)
10714             go_on = curPos.nextText();
10715             nodeStartRect = lvRect(); // reset
10716             continue;
10717         }
10718         curPos.setOffset(startOffset);
10719         if (nodeStartRect.isEmpty()) { // otherwise, we re-use the one left from previous loop
10720             // getRectEx() seems to fail on a single no-break-space, but we
10721             // are not supposed to see a no-br space at start of line.
10722             // Anyway, try next chars if first one(s) fails
10723             while (startOffset <= textLen-2 && !curPos.getRectEx(nodeStartRect, true)) {
10724                 // printf("#### curPos.getRectEx(nodeStartRect:%d) failed\n", startOffset);
10725                 startOffset++;
10726                 curPos.setOffset(startOffset);
10727                 nodeStartRect = lvRect(); // reset
10728             }
10729             // last try with the last char (startOffset = textLen-1):
10730             if (!curPos.getRectEx(nodeStartRect, true)) {
10731                 // printf("#### curPos.getRectEx(nodeStartRect) failed\n");
10732                 // getRectEx() returns false when a node is invisible, so we just
10733                 // go processing next text node on failure (it may fail for other
10734                 // reasons that we won't notice, except for may be holes in the
10735                 // highlighting)
10736                 go_on = curPos.nextText(); // skip this text node
10737                 nodeStartRect = lvRect(); // reset
10738                 continue;
10739             }
10740         }
10741         if (lineStartRect.isEmpty()) {
10742             lineStartRect = nodeStartRect; // re-use the one already computed
10743         }
10744         // This would help noticing a line-feed-back-to-start-of-line:
10745         //   else if (nodeStartRect.left < lineStartRect.right)
10746         // but it makes a 2-lines-tall single segment if text-indent is larger
10747         // than previous line end.
10748         // So, use .top comparison
10749         else if (nodeStartRect.top > lineStartRect.top) {
10750             // We ended last node on a line, but a new node starts (or previous
10751             // one continues) on a different line.
10752             // And we have a not-yet-added lineStartRect: add it as it is
10753             rects.add( lineStartRect );
10754             lineStartRect = nodeStartRect; // start line on current node
10755         }
10756 
10757         // 1) Look if text node contains end of range (probably the case
10758         // when only a few words are highlighted)
10759         if (curPos.getNode() == rangeEnd.getNode() && rangeEnd.getOffset() <= textLen) {
10760             curCharRect = lvRect();
10761             curPos.setOffset(rangeEnd.getOffset() - 1); // Range end is not part of the range
10762             if (!curPos.getRectEx(curCharRect, true)) {
10763                 // printf("#### curPos.getRectEx(textLen=%d) failed\n", textLen);
10764                 go_on = curPos.nextText(); // skip this text node
10765                 nodeStartRect = lvRect(); // reset
10766                 continue;
10767             }
10768             if (curCharRect.top == nodeStartRect.top) { // end of range is on current line
10769                 // (Two offsets in a same text node with the same tops are on the same line)
10770                 lineStartRect.extend(curCharRect);
10771                 // lineStartRect will be added after loop exit
10772                 break; // we're done
10773             }
10774         }
10775 
10776         // 2) Look if the full text node is contained on the line
10777         // Ignore (possibly collapsed) space at end of text node
10778         curPos.setOffset(nodeText[textLen-1] == ' ' ? textLen-2 : textLen-1 );
10779         curCharRect = lvRect();
10780         if (!curPos.getRectEx(curCharRect, true)) {
10781             // printf("#### curPos.getRectEx(textLen=%d) failed\n", textLen);
10782             go_on = curPos.nextText(); // skip this text node
10783             nodeStartRect = lvRect(); // reset
10784             continue;
10785         }
10786         if (curCharRect.top == nodeStartRect.top) {
10787             // Extend line up to the end of this node, but don't add it yet,
10788             // lineStartRect can still be extended with (parts of) next text nodes
10789             lineStartRect.extend(curCharRect);
10790             nodeStartRect  = lvRect(); // reset
10791             go_on = curPos.nextText(); // go processing next text node
10792             continue;
10793         }
10794 
10795         // 3) Current text node's end is not on our line:
10796         // scan it char by char to see where it changes line
10797         // (we could use binary search to reduce the number of iterations)
10798         curPos.setOffset(startOffset);
10799         prevCharRect = nodeStartRect;
10800         for (int i=startOffset+1; i<=textLen-1; i++) {
10801             // skip spaces (but let soft-hyphens in, so they are part of the
10802             // highlight when they are shown at end of line)
10803             lChar32 c = nodeText[i];
10804             if (c == ' ') // || c == 0x00AD)
10805                 continue;
10806             curPos.setOffset(i);
10807             curCharRect = lvRect(); // reset
10808             if (!curPos.getRectEx(curCharRect, true)) {
10809                 // printf("#### curPos.getRectEx(char=%d) failed\n", i);
10810                 // Can happen with non-break-space and may be others,
10811                 // just try with next char
10812                 continue;
10813             }
10814             if (curPos.compare(rangeEnd) >= 0) {
10815                 // should not happen, we should have dealt with it as 1)
10816                 // printf("??????????? curPos.getRectEx(char=%d) end of range\n", i);
10817                 go_on = false;        // don't break yet, need to add what we met before
10818                 curCharRect.top = -1; // force adding prevCharRect
10819             }
10820             if (curCharRect.top != nodeStartRect.top) { // no more on the same line
10821                 if ( ! prevCharRect.isEmpty() ) { // (should never be empty)
10822                     // We got previously a rect on this line: it's the end of line
10823                     lineStartRect.extend(prevCharRect);
10824                     rects.add( lineStartRect );
10825                 }
10826                 // Continue with this text node, but on a new line
10827                 nodeStartRect = curCharRect;
10828                 lineStartRect = lvRect(); // reset
10829                 break; // break for (i<textLen) loop
10830             }
10831             prevCharRect = curCharRect; // still on the line: candidate for end of line
10832             if (! go_on)
10833                 break; // we're done
10834         }
10835     }
10836     // Add any lineStartRect not yet added
10837     if (! lineStartRect.isEmpty()) {
10838         rects.add( lineStartRect );
10839     }
10840 }
10841 
10842 /// sets range to nearest word bounds, returns true if success
getWordRange(ldomXRange & range,ldomXPointer & p)10843 bool ldomXRange::getWordRange( ldomXRange & range, ldomXPointer & p )
10844 {
10845     ldomNode * node = p.getNode();
10846     if ( !node->isText() )
10847         return false;
10848     int pos = p.getOffset();
10849     lString32 txt = node->getText();
10850     if ( pos<0 )
10851         pos = 0;
10852     if ( pos>(int)txt.length() )
10853         pos = txt.length();
10854     int endpos = pos;
10855     for (;;) {
10856         lChar32 ch = txt[endpos];
10857         if ( ch==0 || ch==' ' )
10858             break;
10859         endpos++;
10860     }
10861     /*
10862     // include trailing space
10863     for (;;) {
10864         lChar32 ch = txt[endpos];
10865         if ( ch==0 || ch!=' ' )
10866             break;
10867         endpos++;
10868     }
10869     */
10870     for ( ;; ) {
10871         if ( pos==0 )
10872             break;
10873         if ( txt[pos]!=' ' )
10874             break;
10875         pos--;
10876     }
10877     for ( ;; ) {
10878         if ( pos==0 )
10879             break;
10880         if ( txt[pos-1]==' ' )
10881             break;
10882         pos--;
10883     }
10884     ldomXRange r( ldomXPointer( node, pos ), ldomXPointer( node, endpos ) );
10885     range = r;
10886     return true;
10887 }
10888 #endif
10889 
10890 /// returns true if intersects specified line rectangle
intersects(lvRect & rc,lvRect & intersection)10891 bool ldomMarkedRange::intersects( lvRect & rc, lvRect & intersection )
10892 {
10893     if ( flags < 0x10 ) {
10894         // This assumes lines (rc) are from full-width LTR paragraphs, and
10895         // takes some shortcuts when checking intersection (it can be wrong
10896         // when floats, table cells, or RTL/BiDi text are involved).
10897         if ( start.y>=rc.bottom )
10898             return false;
10899         if ( end.y<rc.top )
10900             return false;
10901         intersection = rc;
10902         if ( start.y>=rc.top && start.y<rc.bottom ) {
10903             if ( start.x > rc.right )
10904                 return false;
10905             intersection.left = rc.left > start.x ? rc.left : start.x;
10906         }
10907         if ( end.y>=rc.top && end.y<rc.bottom ) {
10908             if ( end.x < rc.left )
10909                 return false;
10910             intersection.right = rc.right < end.x ? rc.right : end.x;
10911         }
10912         return true;
10913     }
10914     else {
10915         // Don't take any shortcut and check the full intersection
10916         if ( rc.bottom <= start.y || rc.top >= end.y || rc.right <= start.x || rc.left >= end.x ) {
10917             return false; // no intersection
10918         }
10919         intersection.top = rc.top > start.y ? rc.top : start.y;
10920         intersection.bottom = rc.bottom < end.y ? rc.bottom : end.y;
10921         intersection.left = rc.left > start.x ? rc.left : start.x;
10922         intersection.right = rc.right < end.x ? rc.right : end.x;
10923         return !intersection.isEmpty();
10924     }
10925 }
10926 
10927 /// create bounded by RC list, with (0,0) coordinates at left top corner
10928 // crop/discard elements outside of rc (or outside of crop_rc instead if provided)
ldomMarkedRangeList(const ldomMarkedRangeList * list,lvRect & rc,lvRect * crop_rc)10929 ldomMarkedRangeList::ldomMarkedRangeList( const ldomMarkedRangeList * list, lvRect & rc, lvRect * crop_rc )
10930 {
10931     if ( !list || list->empty() )
10932         return;
10933 //    if ( list->get(0)->start.y>rc.bottom )
10934 //        return;
10935 //    if ( list->get( list->length()-1 )->end.y < rc.top )
10936 //        return;
10937     if ( !crop_rc ) {
10938         // If no alternate crop_rc provided, crop to the rc anchor
10939         crop_rc = &rc;
10940     }
10941     for ( int i=0; i<list->length(); i++ ) {
10942         ldomMarkedRange * src = list->get(i);
10943         if ( src->start.y >= crop_rc->bottom || src->end.y < crop_rc->top )
10944             continue;
10945         add( new ldomMarkedRange(
10946             lvPoint(src->start.x-rc.left, src->start.y-rc.top ),
10947             lvPoint(src->end.x-rc.left, src->end.y-rc.top ),
10948             src->flags ) );
10949     }
10950 }
10951 
10952 /// returns nearest common element for start and end points
getNearestCommonParent()10953 ldomNode * ldomXRange::getNearestCommonParent()
10954 {
10955     ldomXPointerEx start(getStart());
10956     ldomXPointerEx end(getEnd());
10957     while ( start.getLevel() > end.getLevel() && start.parent() )
10958         ;
10959     while ( start.getLevel() < end.getLevel() && end.parent() )
10960         ;
10961     /*
10962     while ( start.getIndex()!=end.getIndex() && start.parent() && end.parent() )
10963         ;
10964     if ( start.getNode()==end.getNode() )
10965         return start.getNode();
10966     return NULL;
10967     */
10968     // This above seems wrong: we could have start and end on the same level,
10969     // but in different parent nodes, with still the same index among these
10970     // different parent nodes' children.
10971     // Best to check for node identity, till we find the same parent,
10972     // or the root node
10973     while ( start.getNode()!=end.getNode() && start.parent() && end.parent() )
10974         ;
10975     return start.getNode();
10976 }
10977 
10978 /// returns HTML (serialized from the DOM, may be different from the source HTML)
10979 /// puts the paths of the linked css files met into the provided lString32Collection cssFiles
getHtml(lString32Collection & cssFiles,int wflags)10980 lString8 ldomXPointer::getHtml(lString32Collection & cssFiles, int wflags)
10981 {
10982     if ( isNull() )
10983         return lString8::empty_str;
10984     ldomNode * startNode = getNode();
10985     LVStreamRef stream = LVCreateMemoryStream(NULL, 0, false, LVOM_WRITE);
10986     writeNodeEx( stream.get(), startNode, cssFiles, wflags );
10987     int size = stream->GetSize();
10988     LVArray<char> buf( size+1, '\0' );
10989     stream->Seek(0, LVSEEK_SET, NULL);
10990     stream->Read( buf.get(), size, NULL );
10991     buf[size] = 0;
10992     lString8 html = lString8( buf.get() );
10993     return html;
10994 }
10995 
10996 /// returns HTML (serialized from the DOM, may be different from the source HTML)
10997 /// puts the paths of the linked css files met into the provided lString32Collection cssFiles
getHtml(lString32Collection & cssFiles,int wflags,bool fromRootNode)10998 lString8 ldomXRange::getHtml(lString32Collection & cssFiles, int wflags, bool fromRootNode)
10999 {
11000     if ( isNull() )
11001         return lString8::empty_str;
11002     sort();
11003     ldomNode * startNode;
11004     if (fromRootNode) {
11005         startNode = getStart().getNode()->getDocument()->getRootNode();
11006         if (startNode->getChildCount() == 1) // start HTML with first child (<body>)
11007             startNode = startNode->getFirstChild();
11008     }
11009     else {
11010         // We need to start from the nearest common parent, to get balanced HTML
11011         startNode = getNearestCommonParent();
11012     }
11013     LVStreamRef stream = LVCreateMemoryStream(NULL, 0, false, LVOM_WRITE);
11014     writeNodeEx( stream.get(), startNode, cssFiles, wflags, getStart(), getEnd() );
11015     int size = stream->GetSize();
11016     LVArray<char> buf( size+1, '\0' );
11017     stream->Seek(0, LVSEEK_SET, NULL);
11018     stream->Read( buf.get(), size, NULL );
11019     buf[size] = 0;
11020     lString8 html = lString8( buf.get() );
11021     return html;
11022 }
11023 
11024 /// searches path for element with specific id, returns level at which element is founs, 0 if not found
findElementInPath(lUInt16 id)11025 int ldomXPointerEx::findElementInPath( lUInt16 id )
11026 {
11027     if ( !ensureElement() )
11028         return 0;
11029     ldomNode * e = getNode();
11030     for ( ; e!=NULL; e = e->getParentNode() ) {
11031         if ( e->getNodeId()==id ) {
11032             return e->getNodeLevel();
11033         }
11034     }
11035     return 0;
11036 }
11037 
ensureFinal()11038 bool ldomXPointerEx::ensureFinal()
11039 {
11040     if ( !ensureElement() )
11041         return false;
11042     int cnt = 0;
11043     int foundCnt = -1;
11044     ldomNode * e = getNode();
11045     for ( ; e!=NULL; e = e->getParentNode() ) {
11046         if ( e->getRendMethod() == erm_final ) {
11047             foundCnt = cnt;
11048         }
11049         cnt++;
11050     }
11051     if ( foundCnt<0 )
11052         return false;
11053     for ( int i=0; i<foundCnt; i++ )
11054         parent();
11055     // curent node is final formatted element (e.g. paragraph)
11056     return true;
11057 }
11058 
11059 /// ensure that current node is element (move to parent, if not - from text node to element)
ensureElement()11060 bool ldomXPointerEx::ensureElement()
11061 {
11062     ldomNode * node = getNode();
11063     if ( !node )
11064         return false;
11065     if ( node->isText()) {
11066         if (!parent())
11067             return false;
11068         node = getNode();
11069     }
11070     if ( !node || !node->isElement() )
11071         return false;
11072     return true;
11073 }
11074 
11075 /// move to first child of current node
firstChild()11076 bool ldomXPointerEx::firstChild()
11077 {
11078     return child(0);
11079 }
11080 
11081 /// move to last child of current node
lastChild()11082 bool ldomXPointerEx::lastChild()
11083 {
11084     int count = getNode()->getChildCount();
11085     if ( count <=0 )
11086         return false;
11087     return child( count - 1 );
11088 }
11089 
11090 /// move to first element child of current node
firstElementChild()11091 bool ldomXPointerEx::firstElementChild()
11092 {
11093     ldomNode * node = getNode();
11094     int count = node->getChildCount();
11095     for ( int i=0; i<count; i++ ) {
11096         if ( node->getChildNode( i )->isElement() )
11097             return child( i );
11098     }
11099     return false;
11100 }
11101 
11102 /// move to last element child of current node
lastElementChild()11103 bool ldomXPointerEx::lastElementChild()
11104 {
11105     ldomNode * node = getNode();
11106     int count = node->getChildCount();
11107     for ( int i=count-1; i>=0; i-- ) {
11108         if ( node->getChildNode( i )->isElement() )
11109             return child( i );
11110     }
11111     return false;
11112 }
11113 
11114 /// forward iteration by elements of DOM three
nextElement()11115 bool ldomXPointerEx::nextElement()
11116 {
11117     if ( !ensureElement() )
11118         return false;
11119     if ( firstElementChild() )
11120         return true;
11121     for (;;) {
11122         if ( nextSiblingElement() )
11123             return true;
11124         if ( !parent() )
11125             return false;
11126     }
11127 }
11128 
11129 /// returns true if current node is visible element with render method == erm_final
isVisibleFinal()11130 bool ldomXPointerEx::isVisibleFinal()
11131 {
11132     if ( !isElement() )
11133         return false;
11134     int cnt = 0;
11135     int foundCnt = -1;
11136     ldomNode * e = getNode();
11137     for ( ; e!=NULL; e = e->getParentNode() ) {
11138         switch ( e->getRendMethod() ) {
11139         case erm_final:
11140             foundCnt = cnt;
11141             break;
11142         case erm_invisible:
11143             foundCnt = -1;
11144             break;
11145         default:
11146             break;
11147         }
11148         cnt++;
11149     }
11150     if ( foundCnt != 0 )
11151         return false;
11152     // curent node is visible final formatted element (e.g. paragraph)
11153     return true;
11154 }
11155 
11156 /// move to next visible text node
nextVisibleText(bool thisBlockOnly)11157 bool ldomXPointerEx::nextVisibleText( bool thisBlockOnly )
11158 {
11159     ldomXPointerEx backup;
11160     if ( thisBlockOnly )
11161         backup = *this;
11162     while ( nextText(thisBlockOnly) ) {
11163         if ( isVisible() )
11164             return true;
11165     }
11166     if ( thisBlockOnly )
11167         *this = backup;
11168     return false;
11169 }
11170 
11171 /// returns true if current node is visible element or text
isVisible()11172 bool ldomXPointerEx::isVisible()
11173 {
11174     ldomNode * p;
11175     ldomNode * node = getNode();
11176     if ( node && node->isText() )
11177         p = node->getParentNode();
11178     else
11179         p = node;
11180     while ( p ) {
11181         if ( p->getRendMethod() == erm_invisible )
11182             return false;
11183         p = p->getParentNode();
11184     }
11185     return true;
11186 }
11187 
11188 /// move to next text node
nextText(bool thisBlockOnly)11189 bool ldomXPointerEx::nextText( bool thisBlockOnly )
11190 {
11191     ldomNode * block = NULL;
11192     if ( thisBlockOnly )
11193         block = getThisBlockNode();
11194     setOffset( 0 );
11195     while ( firstChild() ) {
11196         if ( isText() )
11197             return (!thisBlockOnly || getThisBlockNode()==block);
11198     }
11199     for (;;) {
11200         while ( nextSibling() ) {
11201             if ( isText() )
11202                 return (!thisBlockOnly || getThisBlockNode()==block);
11203             while ( firstChild() ) {
11204                 if ( isText() )
11205                     return (!thisBlockOnly || getThisBlockNode()==block);
11206             }
11207         }
11208         if ( !parent() )
11209             return false;
11210     }
11211 }
11212 
11213 /// move to previous text node
prevText(bool thisBlockOnly)11214 bool ldomXPointerEx::prevText( bool thisBlockOnly )
11215 {
11216     ldomNode * block = NULL;
11217     if ( thisBlockOnly )
11218         block = getThisBlockNode();
11219     setOffset( 0 );
11220     for (;;) {
11221         while ( prevSibling() ) {
11222             if ( isText() )
11223                 return  (!thisBlockOnly || getThisBlockNode()==block);
11224             while ( lastChild() ) {
11225                 if ( isText() )
11226                     return (!thisBlockOnly || getThisBlockNode()==block);
11227             }
11228         }
11229         if ( !parent() )
11230             return false;
11231     }
11232 }
11233 
11234 /// move to previous visible text node
prevVisibleText(bool thisBlockOnly)11235 bool ldomXPointerEx::prevVisibleText( bool thisBlockOnly )
11236 {
11237     ldomXPointerEx backup;
11238     if ( thisBlockOnly )
11239         backup = *this;
11240     while ( prevText( thisBlockOnly ) )
11241         if ( isVisible() )
11242             return true;
11243     if ( thisBlockOnly )
11244         *this = backup;
11245     return false;
11246 }
11247 
11248 /// move to previous visible char
prevVisibleChar(bool thisBlockOnly)11249 bool ldomXPointerEx::prevVisibleChar( bool thisBlockOnly )
11250 {
11251     if ( isNull() )
11252         return false;
11253     if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11254         // move to previous text
11255         if ( !prevVisibleText(thisBlockOnly) )
11256             return false;
11257         ldomNode * node = getNode();
11258         lString32 text = node->getText();
11259         int textLen = text.length();
11260         _data->setOffset( textLen );
11261     }
11262     _data->addOffset(-1);
11263     return true;
11264 }
11265 
11266 /// move to next visible char
nextVisibleChar(bool thisBlockOnly)11267 bool ldomXPointerEx::nextVisibleChar( bool thisBlockOnly )
11268 {
11269     if ( isNull() )
11270         return false;
11271     if ( !isText() || !isVisible() ) {
11272         // move to next text
11273         if ( !nextVisibleText(thisBlockOnly) )
11274             return false;
11275         _data->setOffset( 0 );
11276         return true;
11277     }
11278     ldomNode * node = getNode();
11279     lString32 text = node->getText();
11280     int textLen = text.length();
11281     if ( _data->getOffset() == textLen ) {
11282         // move to next text
11283         if ( !nextVisibleText(thisBlockOnly) )
11284             return false;
11285         _data->setOffset( 0 );
11286         return true;
11287     }
11288     _data->addOffset(1);
11289     return true;
11290 }
11291 
11292 // TODO: implement better behavior
IsUnicodeSpace(lChar32 ch)11293 inline bool IsUnicodeSpace( lChar32 ch )
11294 {
11295     //return ch==' ';
11296     switch ((unsigned short)ch) {
11297         case 0x0020:        // SPACE
11298         case 0x00A0:        // NO-BREAK SPACE
11299         case 0x2000:        // EN QUAD
11300         case 0x2001:        // EM QUAD
11301         case 0x2002:        // EN SPACE
11302         case 0x2003:        // EM SPACE
11303         case 0x2004:        // THREE-PER-EM SPACE
11304         case 0x2005:        // FOUR-PER-EM SPACE
11305         case 0x202F:        // NARROW NO-BREAK SPACE
11306         case 0x3000:        // IDEOGRAPHIC SPACE
11307             return true;
11308     }
11309     return false;
11310 }
11311 
11312 // TODO: implement better behavior
IsUnicodeSpaceOrNull(lChar32 ch)11313 inline bool IsUnicodeSpaceOrNull( lChar32 ch )
11314 {
11315     return ch==0 || IsUnicodeSpace(ch);
11316 }
11317 
11318 // Note:
11319 //  ALL calls to IsUnicodeSpace and IsUnicodeSpaceOrNull in
11320 //  the *VisibleWord* functions below have been replaced with
11321 //  calls to IsWordSeparator and IsWordSeparatorOrNull.
11322 //  The *Sentence* functions have not beed modified, and have not been
11323 //  tested against this change to the *VisibleWord* functions that
11324 //  they use (but KOReader does not use these *Sentence* functions).
11325 
11326 // For better accuracy than IsUnicodeSpace for detecting words
IsWordSeparator(lChar32 ch)11327 inline bool IsWordSeparator( lChar32 ch )
11328 {
11329     return lStr_isWordSeparator(ch);
11330 }
11331 
IsWordSeparatorOrNull(lChar32 ch)11332 inline bool IsWordSeparatorOrNull( lChar32 ch )
11333 {
11334     if (ch==0) return true;
11335     return IsWordSeparator(ch);
11336 }
11337 
canWrapWordBefore(lChar32 ch)11338 inline bool canWrapWordBefore( lChar32 ch ) {
11339     return ch>=0x2e80 && ch<0x2CEAF;
11340 }
11341 
canWrapWordAfter(lChar32 ch)11342 inline bool canWrapWordAfter( lChar32 ch ) {
11343     return ch>=0x2e80 && ch<0x2CEAF;
11344 }
11345 
isVisibleWordChar()11346 bool ldomXPointerEx::isVisibleWordChar() {
11347     if ( isNull() )
11348         return false;
11349     if ( !isText() || !isVisible() )
11350         return false;
11351     ldomNode * node = getNode();
11352     lString32 text = node->getText();
11353     return !IsWordSeparator(text[_data->getOffset()]);
11354 }
11355 
11356 /// move to previous visible word beginning
prevVisibleWordStart(bool thisBlockOnly)11357 bool ldomXPointerEx::prevVisibleWordStart( bool thisBlockOnly )
11358 {
11359     if ( isNull() )
11360         return false;
11361     ldomNode * node = NULL;
11362     lString32 text;
11363     for ( ;; ) {
11364         if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11365             // move to previous text
11366             if ( !prevVisibleText(thisBlockOnly) )
11367                 return false;
11368             node = getNode();
11369             text = node->getText();
11370             int textLen = text.length();
11371             _data->setOffset( textLen );
11372         } else {
11373             node = getNode();
11374             text = node->getText();
11375         }
11376         bool foundNonSeparator = false;
11377         while ( _data->getOffset() > 0 && IsWordSeparator(text[_data->getOffset()-1]) )
11378             _data->addOffset(-1); // skip preceeding space if any (we were on a visible word start)
11379         while ( _data->getOffset()>0 ) {
11380             if ( IsWordSeparator(text[ _data->getOffset()-1 ]) )
11381                 break;
11382             foundNonSeparator = true;
11383             _data->addOffset(-1);
11384             if ( canWrapWordBefore( text[_data->getOffset()] ) ) // CJK char
11385                 break;
11386         }
11387         if ( foundNonSeparator )
11388             return true;
11389     }
11390 }
11391 
11392 /// move to previous visible word end
prevVisibleWordEnd(bool thisBlockOnly)11393 bool ldomXPointerEx::prevVisibleWordEnd( bool thisBlockOnly )
11394 {
11395     if ( isNull() )
11396         return false;
11397     ldomNode * node = NULL;
11398     lString32 text;
11399     bool moved = false;
11400     for ( ;; ) {
11401         if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11402             // move to previous text
11403             if ( !prevVisibleText(thisBlockOnly) )
11404                 return false;
11405             node = getNode();
11406             text = node->getText();
11407             int textLen = text.length();
11408             _data->setOffset( textLen );
11409             moved = true;
11410         } else {
11411             node = getNode();
11412             text = node->getText();
11413         }
11414         // skip separators
11415         while ( _data->getOffset() > 0 && IsWordSeparator(text[_data->getOffset()-1]) ) {
11416             _data->addOffset(-1);
11417             moved = true;
11418         }
11419         if ( moved && _data->getOffset()>0 )
11420             return true; // found!
11421         // skip non-separators
11422         while ( _data->getOffset()>0 ) {
11423             if ( IsWordSeparator(text[ _data->getOffset()-1 ]) )
11424                 break;
11425             if ( moved && canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11426                 return true;
11427             moved = true;
11428             _data->addOffset(-1);
11429         }
11430         // skip separators
11431         while ( _data->getOffset() > 0 && IsWordSeparator(text[_data->getOffset()-1]) ) {
11432             _data->addOffset(-1);
11433             moved = true;
11434         }
11435         if ( moved && _data->getOffset()>0 )
11436             return true; // found!
11437     }
11438 }
11439 
11440 /// move to next visible word beginning
nextVisibleWordStart(bool thisBlockOnly)11441 bool ldomXPointerEx::nextVisibleWordStart( bool thisBlockOnly )
11442 {
11443     if ( isNull() )
11444         return false;
11445     ldomNode * node = NULL;
11446     lString32 text;
11447     int textLen = 0;
11448     bool moved = false;
11449     for ( ;; ) {
11450         if ( !isText() || !isVisible() ) {
11451             // move to previous text
11452             if ( !nextVisibleText(thisBlockOnly) )
11453                 return false;
11454             node = getNode();
11455             text = node->getText();
11456             textLen = text.length();
11457             _data->setOffset( 0 );
11458             moved = true;
11459         } else {
11460             for (;;) {
11461                 node = getNode();
11462                 text = node->getText();
11463                 textLen = text.length();
11464                 if ( _data->getOffset() < textLen )
11465                     break;
11466                 if ( !nextVisibleText(thisBlockOnly) )
11467                     return false;
11468                 _data->setOffset( 0 );
11469                 moved = true;
11470             }
11471         }
11472         // skip separators
11473         while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11474             _data->addOffset(1);
11475             moved = true;
11476         }
11477         if ( moved && _data->getOffset()<textLen )
11478             return true;
11479         // skip non-separators
11480         while ( _data->getOffset()<textLen ) {
11481             if ( IsWordSeparator(text[ _data->getOffset() ]) )
11482                 break;
11483             if ( moved && canWrapWordBefore( text[_data->getOffset()] ) ) // We moved to a CJK char
11484                 return true;
11485             moved = true;
11486             _data->addOffset(1);
11487         }
11488         // skip separators
11489         while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11490             _data->addOffset(1);
11491             moved = true;
11492         }
11493         if ( moved && _data->getOffset()<textLen )
11494             return true;
11495     }
11496 }
11497 
11498 /// move to end of current word
thisVisibleWordEnd(bool thisBlockOnly)11499 bool ldomXPointerEx::thisVisibleWordEnd(bool thisBlockOnly)
11500 {
11501     CR_UNUSED(thisBlockOnly);
11502     if ( isNull() )
11503         return false;
11504     ldomNode * node = NULL;
11505     lString32 text;
11506     int textLen = 0;
11507     bool moved = false;
11508     if ( !isText() || !isVisible() )
11509         return false;
11510     node = getNode();
11511     text = node->getText();
11512     textLen = text.length();
11513     if ( _data->getOffset() >= textLen )
11514         return false;
11515     // skip separators
11516     while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11517         _data->addOffset(1);
11518         //moved = true;
11519     }
11520     // skip non-separators
11521     while ( _data->getOffset()<textLen ) {
11522         if ( IsWordSeparator(text[ _data->getOffset() ]) )
11523             break;
11524         moved = true;
11525         _data->addOffset(1);
11526     }
11527     return moved;
11528 }
11529 
11530 /// move to next visible word end
nextVisibleWordEnd(bool thisBlockOnly)11531 bool ldomXPointerEx::nextVisibleWordEnd( bool thisBlockOnly )
11532 {
11533     if ( isNull() )
11534         return false;
11535     ldomNode * node = NULL;
11536     lString32 text;
11537     int textLen = 0;
11538     //bool moved = false;
11539     for ( ;; ) {
11540         if ( !isText() || !isVisible() ) {
11541             // move to previous text
11542             if ( !nextVisibleText(thisBlockOnly) )
11543                 return false;
11544             node = getNode();
11545             text = node->getText();
11546             textLen = text.length();
11547             _data->setOffset( 0 );
11548             //moved = true;
11549         } else {
11550             for (;;) {
11551                 node = getNode();
11552                 text = node->getText();
11553                 textLen = text.length();
11554                 if ( _data->getOffset() < textLen )
11555                     break;
11556                 if ( !nextVisibleText(thisBlockOnly) )
11557                     return false;
11558                 _data->setOffset( 0 );
11559             }
11560         }
11561         bool nonSeparatorFound = false;
11562         // skip non-separators
11563         while ( _data->getOffset()<textLen ) {
11564             if ( IsWordSeparator(text[ _data->getOffset() ]) )
11565                 break;
11566             nonSeparatorFound = true;
11567             _data->addOffset(1);
11568             if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11569                 return true;
11570         }
11571         if ( nonSeparatorFound )
11572             return true;
11573         // skip separators
11574         while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11575             _data->addOffset(1);
11576             //moved = true;
11577         }
11578         // skip non-separators
11579         while ( _data->getOffset()<textLen ) {
11580             if ( IsWordSeparator(text[ _data->getOffset() ]) )
11581                 break;
11582             nonSeparatorFound = true;
11583             _data->addOffset(1);
11584             if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11585                 return true;
11586         }
11587         if ( nonSeparatorFound )
11588             return true;
11589     }
11590 }
11591 
11592 /// move to previous visible word beginning (in sentence)
prevVisibleWordStartInSentence()11593 bool ldomXPointerEx::prevVisibleWordStartInSentence()
11594 {
11595     if ( isNull() )
11596         return false;
11597     ldomNode * node = NULL;
11598     lString32 text;
11599     for ( ;; ) {
11600         if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11601             // move to previous text
11602             if ( !prevVisibleText(false) )
11603                 return false;
11604             node = getNode();
11605             text = node->getText();
11606             int textLen = text.length();
11607             _data->setOffset( textLen );
11608         } else {
11609             node = getNode();
11610             text = node->getText();
11611         }
11612         bool foundNonSpace = false;
11613         while ( _data->getOffset() > 0 && IsUnicodeSpace(text[_data->getOffset()-1]) )
11614             _data->addOffset(-1); // skip preceeding space if any (we were on a visible word start)
11615         while ( _data->getOffset()>0 ) {
11616             if ( IsUnicodeSpace(text[ _data->getOffset()-1 ]) )
11617                 break;
11618             foundNonSpace = true;
11619             _data->addOffset(-1);
11620             if ( canWrapWordBefore( text[_data->getOffset()] ) ) // CJK char
11621                 break;
11622         }
11623         if ( foundNonSpace )
11624             return true;
11625     }
11626 }
11627 
11628 /// move to next visible word beginning (in sentence)
nextVisibleWordStartInSentence()11629 bool ldomXPointerEx::nextVisibleWordStartInSentence()
11630 {
11631     if ( isNull() )
11632         return false;
11633     ldomNode * node = NULL;
11634     lString32 text;
11635     int textLen = 0;
11636     bool moved = false;
11637     for ( ;; ) {
11638         if ( !isText() || !isVisible() ) {
11639             // move to next text
11640             if ( !nextVisibleText(false) )
11641                 return false;
11642             node = getNode();
11643             text = node->getText();
11644             textLen = text.length();
11645             _data->setOffset( 0 );
11646             moved = true;
11647         } else {
11648             for (;;) {
11649                 node = getNode();
11650                 text = node->getText();
11651                 textLen = text.length();
11652                 if ( _data->getOffset() < textLen )
11653                     break;
11654                 if ( !nextVisibleText(false) )
11655                     return false;
11656                 _data->setOffset( 0 );
11657                 moved = true;
11658             }
11659         }
11660         // skip spaces
11661         while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11662             _data->addOffset(1);
11663             moved = true;
11664         }
11665         if ( moved && _data->getOffset()<textLen )
11666             return true;
11667         // skip non-spaces
11668         while ( _data->getOffset()<textLen ) {
11669             if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11670                 break;
11671             if ( moved && canWrapWordBefore( text[_data->getOffset()] ) ) // We moved to a CJK char
11672                 return true;
11673             moved = true;
11674             _data->addOffset(1);
11675         }
11676         // skip spaces
11677         while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11678             _data->addOffset(1);
11679             moved = true;
11680         }
11681         if ( moved && _data->getOffset()<textLen )
11682             return true;
11683     }
11684 }
11685 
11686 /// move to end of current word
thisVisibleWordEndInSentence()11687 bool ldomXPointerEx::thisVisibleWordEndInSentence()
11688 {
11689     if ( isNull() )
11690         return false;
11691     ldomNode * node = NULL;
11692     lString32 text;
11693     int textLen = 0;
11694     bool moved = false;
11695     if ( !isText() || !isVisible() )
11696         return false;
11697     node = getNode();
11698     text = node->getText();
11699     textLen = text.length();
11700     if ( _data->getOffset() >= textLen )
11701         return false;
11702     // skip spaces
11703     while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11704         _data->addOffset(1);
11705         //moved = true;
11706     }
11707     // skip non-spaces
11708     while ( _data->getOffset()<textLen ) {
11709         if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11710             break;
11711         moved = true;
11712         _data->addOffset(1);
11713     }
11714     return moved;
11715 }
11716 
11717 /// move to next visible word end (in sentence)
nextVisibleWordEndInSentence()11718 bool ldomXPointerEx::nextVisibleWordEndInSentence()
11719 {
11720     if ( isNull() )
11721         return false;
11722     ldomNode * node = NULL;
11723     lString32 text;
11724     int textLen = 0;
11725     //bool moved = false;
11726     for ( ;; ) {
11727         if ( !isText() || !isVisible() ) {
11728             // move to previous text
11729             if ( !nextVisibleText(true) )
11730                 return false;
11731             node = getNode();
11732             text = node->getText();
11733             textLen = text.length();
11734             _data->setOffset( 0 );
11735             //moved = true;
11736         } else {
11737             for (;;) {
11738                 node = getNode();
11739                 text = node->getText();
11740                 textLen = text.length();
11741                 if ( _data->getOffset() < textLen )
11742                     break;
11743                 if ( !nextVisibleText(true) )
11744                     return false;
11745                 _data->setOffset( 0 );
11746             }
11747         }
11748         bool nonSpaceFound = false;
11749         // skip non-spaces
11750         while ( _data->getOffset()<textLen ) {
11751             if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11752                 break;
11753             nonSpaceFound = true;
11754             _data->addOffset(1);
11755             if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11756                 return true;
11757         }
11758         if ( nonSpaceFound )
11759             return true;
11760         // skip spaces
11761         while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11762             _data->addOffset(1);
11763             //moved = true;
11764         }
11765         // skip non-spaces
11766         while ( _data->getOffset()<textLen ) {
11767             if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11768                 break;
11769             nonSpaceFound = true;
11770             _data->addOffset(1);
11771             if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11772                 return true;
11773         }
11774         if ( nonSpaceFound )
11775             return true;
11776     }
11777 }
11778 
11779 /// move to previous visible word end (in sentence)
prevVisibleWordEndInSentence()11780 bool ldomXPointerEx::prevVisibleWordEndInSentence()
11781 {
11782     if ( isNull() )
11783         return false;
11784     ldomNode * node = NULL;
11785     lString32 text;
11786     bool moved = false;
11787     for ( ;; ) {
11788         if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11789             // move to previous text
11790             if ( !prevVisibleText(false) )
11791                 return false;
11792             node = getNode();
11793             text = node->getText();
11794             int textLen = text.length();
11795             _data->setOffset( textLen );
11796             moved = true;
11797         } else {
11798             node = getNode();
11799             text = node->getText();
11800         }
11801         // skip spaces
11802         while ( _data->getOffset() > 0 && IsUnicodeSpace(text[_data->getOffset()-1]) ) {
11803             _data->addOffset(-1);
11804             moved = true;
11805         }
11806         if ( moved && _data->getOffset()>0 )
11807             return true; // found!
11808         // skip non-spaces
11809         while ( _data->getOffset()>0 ) {
11810             if ( IsUnicodeSpace(text[ _data->getOffset()-1 ]) )
11811                 break;
11812             if ( moved && canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11813                 return true;
11814             moved = true;
11815             _data->addOffset(-1);
11816         }
11817         // skip spaces
11818         while ( _data->getOffset() > 0 && IsUnicodeSpace(text[_data->getOffset()-1]) ) {
11819             _data->addOffset(-1);
11820             moved = true;
11821         }
11822         if ( moved && _data->getOffset()>0 )
11823             return true; // found!
11824     }
11825 }
11826 
11827 /// returns true if current position is visible word beginning
isVisibleWordStart()11828 bool ldomXPointerEx::isVisibleWordStart()
11829 {
11830    if ( isNull() )
11831         return false;
11832     if ( !isText() || !isVisible() )
11833         return false;
11834     ldomNode * node = getNode();
11835     lString32 text = node->getText();
11836     int textLen = text.length();
11837     int i = _data->getOffset();
11838     // We're actually testing the boundary between the char at i-1 and
11839     // the char at i. So, we return true when [i] is the first letter
11840     // of a word.
11841     lChar32 currCh = i<textLen ? text[i] : 0;
11842     lChar32 prevCh = i<=textLen && i>0 ? text[i-1] : 0;
11843     if (canWrapWordBefore(currCh)) {
11844         // If [i] is a CJK char (that's what canWrapWordBefore()
11845         // checks), this is a visible word start.
11846         return true;
11847     }
11848     if (IsWordSeparatorOrNull(prevCh) && !IsWordSeparator(currCh)) {
11849         // If [i-1] is a space or punctuation (or [i] is the start of the text
11850         // node) and [i] is a letter: this is a visible word start.
11851         return true;
11852     }
11853     return false;
11854  }
11855 
11856 /// returns true if current position is visible word end
isVisibleWordEnd()11857 bool ldomXPointerEx::isVisibleWordEnd()
11858 {
11859     if ( isNull() )
11860         return false;
11861     if ( !isText() || !isVisible() )
11862         return false;
11863     ldomNode * node = getNode();
11864     lString32 text = node->getText();
11865     int textLen = text.length();
11866     int i = _data->getOffset();
11867     // We're actually testing the boundary between the char at i-1 and
11868     // the char at i. So, we return true when [i-1] is the last letter
11869     // of a word.
11870     lChar32 currCh = i>0 ? text[i-1] : 0;
11871     lChar32 nextCh = i<textLen ? text[i] : 0;
11872     if (canWrapWordAfter(currCh)) {
11873         // If [i-1] is a CJK char (that's what canWrapWordAfter()
11874         // checks), this is a visible word end.
11875         return true;
11876     }
11877     if (!IsWordSeparator(currCh) && IsWordSeparatorOrNull(nextCh)) {
11878         // If [i-1] is a letter and [i] is a space or punctuation (or [i-1] is
11879         // the last letter of a text node): this is a visible word end.
11880         return true;
11881     }
11882     return false;
11883 }
11884 
11885 /// returns block owner node of current node (or current node if it's block)
getThisBlockNode()11886 ldomNode * ldomXPointerEx::getThisBlockNode()
11887 {
11888     if ( isNull() )
11889         return NULL;
11890     ldomNode * node = getNode();
11891     if ( node->isText() )
11892         node = node->getParentNode();
11893     for (;;) {
11894         if ( !node )
11895             return NULL;
11896         lvdom_element_render_method rm = node->getRendMethod();
11897         switch ( rm ) {
11898         case erm_block:
11899         case erm_final:
11900         case erm_table:
11901         case erm_table_row_group:
11902         case erm_table_row:
11903             return node;
11904         default:
11905             break; // ignore
11906         }
11907         node = node->getParentNode();
11908     }
11909 }
11910 
11911 /// returns true if points to last visible text inside block element
isLastVisibleTextInBlock()11912 bool ldomXPointerEx::isLastVisibleTextInBlock()
11913 {
11914     if ( !isText() )
11915         return false;
11916     ldomXPointerEx pos(*this);
11917     return !pos.nextVisibleText(true);
11918 }
11919 
11920 /// returns true if points to first visible text inside block element
isFirstVisibleTextInBlock()11921 bool ldomXPointerEx::isFirstVisibleTextInBlock()
11922 {
11923     if ( !isText() )
11924         return false;
11925     ldomXPointerEx pos(*this);
11926     return !pos.prevVisibleText(true);
11927 }
11928 
11929 // sentence navigation
11930 
11931 /// returns true if points to beginning of sentence
isSentenceStart()11932 bool ldomXPointerEx::isSentenceStart()
11933 {
11934     if ( isNull() )
11935         return false;
11936     if ( !isText() || !isVisible() )
11937         return false;
11938     ldomNode * node = getNode();
11939     lString32 text = node->getText();
11940     int textLen = text.length();
11941     int i = _data->getOffset();
11942     lChar32 currCh = i<textLen ? text[i] : 0;
11943     lChar32 prevCh = i>0 ? text[i-1] : 0;
11944     lChar32 prevNonSpace = 0;
11945     for ( ;i>0; i-- ) {
11946         lChar32 ch = text[i-1];
11947         if ( !IsUnicodeSpace(ch) ) {
11948             prevNonSpace = ch;
11949             break;
11950         }
11951     }
11952 #if 0
11953     // At this implementation it's a wrong to check previous node
11954     if ( !prevNonSpace ) {
11955         ldomXPointerEx pos(*this);
11956         while ( !prevNonSpace && pos.prevVisibleText(true) ) {
11957             lString32 prevText = pos.getText();
11958             for ( int j=prevText.length()-1; j>=0; j-- ) {
11959                 lChar32 ch = prevText[j];
11960                 if ( !IsUnicodeSpace(ch) ) {
11961                     prevNonSpace = ch;
11962                     break;
11963                 }
11964             }
11965         }
11966     }
11967 #endif
11968 
11969     // skip separated separator.
11970     if (1 == textLen) {
11971         switch (currCh) {
11972             case '.':
11973             case '?':
11974             case '!':
11975             case U'\x2026': // horizontal ellypsis
11976                 return false;
11977         }
11978     }
11979 
11980     if ( !IsUnicodeSpace(currCh) && IsUnicodeSpaceOrNull(prevCh) ) {
11981         switch (prevNonSpace) {
11982         case 0:
11983         case '.':
11984         case '?':
11985         case '!':
11986         case U'\x2026': // horizontal ellypsis
11987             return true;
11988         default:
11989             return false;
11990         }
11991     }
11992     return false;
11993 }
11994 
11995 /// returns true if points to end of sentence
isSentenceEnd()11996 bool ldomXPointerEx::isSentenceEnd()
11997 {
11998     if ( isNull() )
11999         return false;
12000     if ( !isText() || !isVisible() )
12001         return false;
12002     ldomNode * node = getNode();
12003     lString32 text = node->getText();
12004     int textLen = text.length();
12005     int i = _data->getOffset();
12006     lChar32 currCh = i<textLen ? text[i] : 0;
12007     lChar32 prevCh = i>0 ? text[i-1] : 0;
12008     if ( IsUnicodeSpaceOrNull(currCh) ) {
12009         switch (prevCh) {
12010         case 0:
12011         case '.':
12012         case '?':
12013         case '!':
12014         case U'\x2026': // horizontal ellypsis
12015             return true;
12016         default:
12017             break;
12018         }
12019     }
12020     // word is not ended with . ! ?
12021     // check whether it's last word of block
12022     ldomXPointerEx pos(*this);
12023     //return !pos.nextVisibleWordStartInSentence();
12024     return !pos.thisVisibleWordEndInSentence();
12025 }
12026 
12027 /// move to beginning of current visible text sentence
thisSentenceStart()12028 bool ldomXPointerEx::thisSentenceStart()
12029 {
12030     if ( isNull() )
12031         return false;
12032     if ( !isText() && !nextVisibleText() && !prevVisibleText() )
12033         return false;
12034     for (;;) {
12035         if ( isSentenceStart() )
12036             return true;
12037         if ( !prevVisibleWordStartInSentence() )
12038             return false;
12039     }
12040 }
12041 
12042 /// move to end of current visible text sentence
thisSentenceEnd()12043 bool ldomXPointerEx::thisSentenceEnd()
12044 {
12045     if ( isNull() )
12046         return false;
12047     if ( !isText() && !nextVisibleText() && !prevVisibleText() )
12048         return false;
12049     for (;;) {
12050         if ( isSentenceEnd() )
12051             return true;
12052         if ( !nextVisibleWordEndInSentence() )
12053             return false;
12054     }
12055 }
12056 
12057 /// move to beginning of next visible text sentence
nextSentenceStart()12058 bool ldomXPointerEx::nextSentenceStart()
12059 {
12060     if ( !isSentenceStart() && !thisSentenceEnd() )
12061         return false;
12062     for (;;) {
12063         if ( !nextVisibleWordStartInSentence() )
12064             return false;
12065         if ( isSentenceStart() )
12066             return true;
12067     }
12068 }
12069 
12070 /// move to beginning of prev visible text sentence
prevSentenceStart()12071 bool ldomXPointerEx::prevSentenceStart()
12072 {
12073     if ( !thisSentenceStart() )
12074         return false;
12075     for (;;) {
12076         if ( !prevVisibleWordStartInSentence() )
12077             return false;
12078         if ( isSentenceStart() )
12079             return true;
12080     }
12081 }
12082 
12083 /// move to end of next visible text sentence
nextSentenceEnd()12084 bool ldomXPointerEx::nextSentenceEnd()
12085 {
12086     if ( !nextSentenceStart() )
12087         return false;
12088     return thisSentenceEnd();
12089 }
12090 
12091 /// move to end of next visible text sentence
prevSentenceEnd()12092 bool ldomXPointerEx::prevSentenceEnd()
12093 {
12094     if ( !thisSentenceStart() )
12095         return false;
12096     for (;;) {
12097         if ( !prevVisibleWordEndInSentence() )
12098             return false;
12099         if ( isSentenceEnd() )
12100             return true;
12101     }
12102 }
12103 
12104 /// if start is after end, swap start and end
sort()12105 void ldomXRange::sort()
12106 {
12107     if ( _start.isNull() || _end.isNull() )
12108         return;
12109     if ( _start.compare(_end) > 0 ) {
12110         ldomXPointer p1( _start );
12111         ldomXPointer p2( _end );
12112         _start = p2;
12113         _end = p1;
12114     }
12115 }
12116 
12117 /// backward iteration by elements of DOM three
prevElement()12118 bool ldomXPointerEx::prevElement()
12119 {
12120     if ( !ensureElement() )
12121         return false;
12122     for (;;) {
12123         if ( prevSiblingElement() ) {
12124             while ( lastElementChild() )
12125                 ;
12126             return true;
12127         }
12128         if ( !parent() )
12129             return false;
12130         return true;
12131     }
12132 }
12133 
12134 /// move to next final visible node (~paragraph)
nextVisibleFinal()12135 bool ldomXPointerEx::nextVisibleFinal()
12136 {
12137     for ( ;; ) {
12138         if ( !nextElement() )
12139             return false;
12140         if ( isVisibleFinal() )
12141             return true;
12142     }
12143 }
12144 
12145 /// move to previous final visible node (~paragraph)
prevVisibleFinal()12146 bool ldomXPointerEx::prevVisibleFinal()
12147 {
12148     for ( ;; ) {
12149         if ( !prevElement() )
12150             return false;
12151         if ( isVisibleFinal() )
12152             return true;
12153     }
12154 }
12155 
12156 /// run callback for each node in range
forEach(ldomNodeCallback * callback)12157 void ldomXRange::forEach( ldomNodeCallback * callback )
12158 {
12159     if ( isNull() )
12160         return;
12161     ldomXRange pos( _start, _end, 0 );
12162     bool allowGoRecurse = true;
12163     while ( !pos._start.isNull() && pos._start.compare( _end ) < 0 ) {
12164         // do something
12165         ldomNode * node = pos._start.getNode();
12166         //lString32 path = pos._start.toString();
12167         //CRLog::trace( "%s", UnicodeToUtf8(path).c_str() );
12168         if ( node->isElement() ) {
12169             allowGoRecurse = callback->onElement( &pos.getStart() );
12170         } else if ( node->isText() ) {
12171             lString32 txt = node->getText();
12172             pos._end = pos._start;
12173             pos._start.setOffset( 0 );
12174             pos._end.setOffset( txt.length() );
12175             if ( _start.getNode() == node ) {
12176                 pos._start.setOffset( _start.getOffset() );
12177             }
12178             if ( _end.getNode() == node && pos._end.getOffset() > _end.getOffset()) {
12179                 pos._end.setOffset( _end.getOffset() );
12180             }
12181             callback->onText( &pos );
12182             allowGoRecurse = false;
12183         }
12184         // move to next item
12185         bool stop = false;
12186         if ( !allowGoRecurse || !pos._start.child(0) ) {
12187              while ( !pos._start.nextSibling() ) {
12188                 if ( !pos._start.parent() ) {
12189                     stop = true;
12190                     break;
12191                 }
12192             }
12193         }
12194         if ( stop )
12195             break;
12196     }
12197 }
12198 
12199 class ldomWordsCollector : public ldomNodeCallback {
12200     LVArray<ldomWord> & _list;
operator =(ldomWordsCollector &)12201 	ldomWordsCollector & operator = (ldomWordsCollector&) {
12202 		// no assignment
12203         return *this;
12204     }
12205 public:
ldomWordsCollector(LVArray<ldomWord> & list)12206     ldomWordsCollector( LVArray<ldomWord> & list )
12207         : _list( list )
12208     {
12209     }
12210     /// called for each found text fragment in range
onText(ldomXRange * nodeRange)12211     virtual void onText( ldomXRange * nodeRange )
12212     {
12213         ldomNode * node = nodeRange->getStart().getNode();
12214         lString32 text = node->getText();
12215         int len = text.length();
12216         int end = nodeRange->getEnd().getOffset();
12217         if ( len>end )
12218             len = end;
12219         int beginOfWord = -1;
12220         for ( int i=nodeRange->getStart().getOffset(); i <= len; i++ ) {
12221             // int alpha = lGetCharProps(text[i]) & CH_PROP_ALPHA;
12222             // Also allow digits (years, page numbers) to be considered words
12223             // int alpha = lGetCharProps(text[i]) & (CH_PROP_ALPHA|CH_PROP_DIGIT|CH_PROP_HYPHEN);
12224             // We use lStr_isWordSeparator() as the other word finding/skipping functions do,
12225             // so they all share the same notion of what a word is.
12226             int alpha = !lStr_isWordSeparator(text[i]); // alpha, number, CJK char
12227             if (alpha && beginOfWord<0 ) {
12228                 beginOfWord = i;
12229             }
12230             if ( !alpha && beginOfWord>=0) { // space, punctuation, sign, paren...
12231                 _list.add( ldomWord( node, beginOfWord, i ) );
12232                 beginOfWord = -1;
12233             }
12234             if (lGetCharProps(text[i]) == CH_PROP_CJK && i < len) { // a CJK char makes its own word
12235                 _list.add( ldomWord( node, i, i+1 ) );
12236                 beginOfWord = -1;
12237             }
12238         }
12239     }
12240     /// called for each found node in range
onElement(ldomXPointerEx * ptr)12241     virtual bool onElement( ldomXPointerEx * ptr )
12242     {
12243         ldomNode * elem = ptr->getNode();
12244         if ( elem->getRendMethod()==erm_invisible )
12245             return false;
12246         return true;
12247     }
12248 };
12249 
12250 /// get all words from specified range
getRangeWords(LVArray<ldomWord> & list)12251 void ldomXRange::getRangeWords( LVArray<ldomWord> & list )
12252 {
12253     ldomWordsCollector collector( list );
12254     forEach( &collector );
12255 }
12256 
12257 /// adds all visible words from range, returns number of added words
addRangeWords(ldomXRange & range,bool)12258 int ldomWordExList::addRangeWords( ldomXRange & range, bool /*trimPunctuation*/ ) {
12259     LVArray<ldomWord> list;
12260     range.getRangeWords( list );
12261     for ( int i=0; i<list.length(); i++ )
12262         add( new ldomWordEx(list[i]) );
12263     init();
12264     return list.length();
12265 }
12266 
getMiddlePoint()12267 lvPoint ldomMarkedRange::getMiddlePoint() {
12268     if ( start.y==end.y ) {
12269         return lvPoint( ((start.x + end.x)>>1), start.y );
12270     } else {
12271         return start;
12272     }
12273 }
12274 
12275 /// returns distance (dx+dy) from specified point to middle point
calcDistance(int x,int y,MoveDirection dir)12276 int ldomMarkedRange::calcDistance( int x, int y, MoveDirection dir ) {
12277     lvPoint middle = getMiddlePoint();
12278     int dx = middle.x - x;
12279     int dy = middle.y - y;
12280     if ( dx<0 ) dx = -dx;
12281     if ( dy<0 ) dy = -dy;
12282     switch (dir) {
12283     case DIR_LEFT:
12284     case DIR_RIGHT:
12285         return dx + dy;
12286     case DIR_UP:
12287     case DIR_DOWN:
12288         return dx + dy*100;
12289     case DIR_ANY:
12290         return dx + dy;
12291     }
12292 
12293 
12294     return dx + dy;
12295 }
12296 
12297 /// select word
selectWord(ldomWordEx * word,MoveDirection dir)12298 void ldomWordExList::selectWord( ldomWordEx * word, MoveDirection dir )
12299 {
12300     selWord = word;
12301     if ( selWord ) {
12302         lvPoint middle = word->getMark().getMiddlePoint();
12303         if ( x==-1 || (dir!=DIR_UP && dir!=DIR_DOWN) )
12304             x = middle.x;
12305         y = middle.y;
12306     } else {
12307         x = y = -1;
12308     }
12309 }
12310 
12311 /// select next word in specified direction
selectNextWord(MoveDirection dir,int moveBy)12312 ldomWordEx * ldomWordExList::selectNextWord( MoveDirection dir, int moveBy )
12313 {
12314     if ( !selWord )
12315         return selectMiddleWord();
12316     pattern.clear();
12317     for ( int i=0; i<moveBy; i++ ) {
12318         ldomWordEx * word = findNearestWord( x, y, dir );
12319         if ( word )
12320             selectWord( word, dir );
12321     }
12322     return selWord;
12323 }
12324 
12325 /// select middle word in range
selectMiddleWord()12326 ldomWordEx * ldomWordExList::selectMiddleWord() {
12327     if ( minx==-1 )
12328         init();
12329     ldomWordEx * word = findNearestWord( (maxx+minx)/2, (miny+maxy)/2, DIR_ANY );
12330     selectWord(word, DIR_ANY);
12331     return word;
12332 }
12333 
findWordByPattern()12334 ldomWordEx * ldomWordExList::findWordByPattern()
12335 {
12336     ldomWordEx * lastBefore = NULL;
12337     ldomWordEx * firstAfter = NULL;
12338     bool selReached = false;
12339     for ( int i=0; i<length(); i++ ) {
12340         ldomWordEx * item = get(i);
12341         if ( item==selWord )
12342             selReached = true;
12343         lString32 text = item->getText();
12344         text.lowercase();
12345         bool flg = true;
12346         for ( int j=0; j<pattern.length(); j++ ) {
12347             if ( j>=text.length() ) {
12348                 flg = false;
12349                 break;
12350             }
12351             lString32 chars = pattern[j];
12352             chars.lowercase();
12353             bool charFound = false;
12354             for ( int k=0; k<chars.length(); k++ ) {
12355                 if ( chars[k]==text[j] ) {
12356                     charFound = true;
12357                     break;
12358                 }
12359             }
12360             if ( !charFound ) {
12361                 flg = false;
12362                 break;
12363             }
12364         }
12365         if ( !flg )
12366             continue;
12367         if ( selReached ) {
12368             if ( firstAfter==NULL )
12369                 firstAfter = item;
12370         } else {
12371             lastBefore = item;
12372         }
12373     }
12374 
12375     if ( firstAfter )
12376         return firstAfter;
12377     else
12378         return lastBefore;
12379 }
12380 
12381 /// try append search pattern and find word
appendPattern(lString32 chars)12382 ldomWordEx * ldomWordExList::appendPattern(lString32 chars)
12383 {
12384     pattern.add(chars);
12385     ldomWordEx * foundWord = findWordByPattern();
12386 
12387     if ( foundWord ) {
12388         selectWord(foundWord, DIR_ANY);
12389     } else {
12390         pattern.erase(pattern.length()-1, 1);
12391     }
12392     return foundWord;
12393 }
12394 
12395 /// remove last character from pattern and try to search
reducePattern()12396 ldomWordEx * ldomWordExList::reducePattern()
12397 {
12398     if ( pattern.length()==0 )
12399         return NULL;
12400     pattern.erase(pattern.length()-1, 1);
12401     ldomWordEx * foundWord = findWordByPattern();
12402 
12403     if ( foundWord )
12404         selectWord(foundWord, DIR_ANY);
12405     return foundWord;
12406 }
12407 
12408 /// find word nearest to specified point
findNearestWord(int x,int y,MoveDirection dir)12409 ldomWordEx * ldomWordExList::findNearestWord( int x, int y, MoveDirection dir ) {
12410     if ( !length() )
12411         return NULL;
12412     int bestDistance = -1;
12413     ldomWordEx * bestWord = NULL;
12414     ldomWordEx * defWord = (dir==DIR_LEFT || dir==DIR_UP) ? get(length()-1) : get(0);
12415     int i;
12416     if ( dir==DIR_LEFT || dir==DIR_RIGHT ) {
12417         int thisLineY = -1;
12418         int thisLineDy = -1;
12419         for ( i=0; i<length(); i++ ) {
12420             ldomWordEx * item = get(i);
12421             lvPoint middle = item->getMark().getMiddlePoint();
12422             int dy = middle.y - y;
12423             if ( dy<0 ) dy = -dy;
12424             if ( thisLineY==-1 || thisLineDy>dy ) {
12425                 thisLineY = middle.y;
12426                 thisLineDy = dy;
12427             }
12428         }
12429         ldomWordEx * nextLineWord = NULL;
12430         for ( i=0; i<length(); i++ ) {
12431             ldomWordEx * item = get(i);
12432             if ( dir!=DIR_ANY && item==selWord )
12433                 continue;
12434             ldomMarkedRange * mark = &item->getMark();
12435             lvPoint middle = mark->getMiddlePoint();
12436             switch ( dir ) {
12437             case DIR_LEFT:
12438                 if ( middle.y<thisLineY )
12439                     nextLineWord = item; // last word of prev line
12440                 if ( middle.x>=x )
12441                     continue;
12442                 break;
12443             case DIR_RIGHT:
12444                 if ( nextLineWord==NULL && middle.y>thisLineY )
12445                     nextLineWord = item; // first word of next line
12446                 if ( middle.x<=x )
12447                     continue;
12448                 break;
12449             case DIR_UP:
12450             case DIR_DOWN:
12451             case DIR_ANY:
12452                 // none
12453                 break;
12454             }
12455             if ( middle.y!=thisLineY )
12456                 continue;
12457             int dist = mark->calcDistance(x, y, dir);
12458             if ( bestDistance==-1 || dist<bestDistance ) {
12459                 bestWord = item;
12460                 bestDistance = dist;
12461             }
12462         }
12463         if ( bestWord!=NULL )
12464             return bestWord; // found in the same line
12465         if ( nextLineWord!=NULL  )
12466             return nextLineWord;
12467         return defWord;
12468     }
12469     for ( i=0; i<length(); i++ ) {
12470         ldomWordEx * item = get(i);
12471         if ( dir!=DIR_ANY && item==selWord )
12472             continue;
12473         ldomMarkedRange * mark = &item->getMark();
12474         lvPoint middle = mark->getMiddlePoint();
12475         if ( dir==DIR_UP && middle.y >= y )
12476             continue;
12477         if ( dir==DIR_DOWN && middle.y <= y )
12478             continue;
12479 
12480         int dist = mark->calcDistance(x, y, dir);
12481         if ( bestDistance==-1 || dist<bestDistance ) {
12482             bestWord = item;
12483             bestDistance = dist;
12484         }
12485     }
12486     if ( bestWord!=NULL )
12487         return bestWord;
12488     return defWord;
12489 }
12490 
init()12491 void ldomWordExList::init()
12492 {
12493     if ( !length() )
12494         return;
12495     for ( int i=0; i<length(); i++ ) {
12496         ldomWordEx * item = get(i);
12497         lvPoint middle = item->getMark().getMiddlePoint();
12498         if ( i==0 || minx > middle.x )
12499             minx = middle.x;
12500         if ( i==0 || maxx < middle.x )
12501             maxx = middle.x;
12502         if ( i==0 || miny > middle.y )
12503             miny = middle.y;
12504         if ( i==0 || maxy < middle.y )
12505             maxy = middle.y;
12506     }
12507 }
12508 
12509 
12510 class ldomTextCollector : public ldomNodeCallback
12511 {
12512 private:
12513     bool lastText;
12514     bool newBlock;
12515     lChar32  delimiter;
12516     int  maxLen;
12517     lString32 text;
12518 public:
ldomTextCollector(lChar32 blockDelimiter,int maxTextLen)12519     ldomTextCollector( lChar32 blockDelimiter, int maxTextLen )
12520         : lastText(false), newBlock(true), delimiter( blockDelimiter), maxLen( maxTextLen )
12521     {
12522     }
12523     /// destructor
~ldomTextCollector()12524     virtual ~ldomTextCollector() { }
12525     /// called for each found text fragment in range
onText(ldomXRange * nodeRange)12526     virtual void onText( ldomXRange * nodeRange )
12527     {
12528         if ( newBlock && !text.empty()) {
12529             text << delimiter;
12530         }
12531         lString32 txt = nodeRange->getStart().getNode()->getText();
12532         int start = nodeRange->getStart().getOffset();
12533         int end = nodeRange->getEnd().getOffset();
12534         if ( start < end ) {
12535             text << txt.substr( start, end-start );
12536         }
12537         lastText = true;
12538         newBlock = false;
12539     }
12540     /// called for each found node in range
onElement(ldomXPointerEx * ptr)12541     virtual bool onElement( ldomXPointerEx * ptr )
12542     {
12543 #if BUILD_LITE!=1
12544         ldomNode * elem = (ldomNode *)ptr->getNode();
12545         // Allow tweaking that with hints
12546         css_style_ref_t style = elem->getStyle();
12547         if ( STYLE_HAS_CR_HINT(style, TEXT_SELECTION_SKIP) ) {
12548             return false;
12549         }
12550         else if ( STYLE_HAS_CR_HINT(style, TEXT_SELECTION_INLINE) ) {
12551             newBlock = false;
12552             return true;
12553         }
12554         else if ( STYLE_HAS_CR_HINT(style, TEXT_SELECTION_BLOCK) ) {
12555             newBlock = true;
12556             return true;
12557         }
12558         lvdom_element_render_method rm = elem->getRendMethod();
12559         if ( rm == erm_invisible )
12560             return false;
12561         if ( rm == erm_inline ) {
12562             // Don't set newBlock if rendering method is erm_inline,
12563             // no matter the original CSS display.
12564             // (Don't reset any previously set and not consumed newBlock)
12565             return true;
12566         }
12567         // For other rendering methods (that would bring newBlock=true),
12568         // look at the initial CSS display, as we might have boxed some
12569         // inline-like elements for rendering purpose.
12570         css_display_t d = style->display;
12571         if ( d <= css_d_inline || d == css_d_inline_block || d == css_d_inline_table ) {
12572             // inline, ruby; consider inline-block/-table as inline, in case
12573             // they don't contain much (if they do, some inner block element
12574             // will set newBlock=true).
12575             return true;
12576         }
12577         // Otherwise, it's a block like node, and we want a \n before the next text
12578         newBlock = true;
12579         return true;
12580 #else
12581         newBlock = true;
12582         return true;
12583 #endif
12584     }
12585     /// get collected text
getText()12586     lString32 getText() { return text; }
12587 };
12588 
12589 /// returns text between two XPointer positions
getRangeText(lChar32 blockDelimiter,int maxTextLen)12590 lString32 ldomXRange::getRangeText( lChar32 blockDelimiter, int maxTextLen )
12591 {
12592     ldomTextCollector callback( blockDelimiter, maxTextLen );
12593     forEach( &callback );
12594     return removeSoftHyphens( callback.getText() );
12595 }
12596 
12597 /// returns href attribute of <A> element, plus xpointer of <A> element itself
getHRef(ldomXPointer & a_xpointer)12598 lString32 ldomXPointer::getHRef(ldomXPointer & a_xpointer)
12599 {
12600     if ( isNull() )
12601         return lString32::empty_str;
12602     ldomNode * node = getNode();
12603     while ( node && !node->isElement() )
12604         node = node->getParentNode();
12605     while ( node && node->getNodeId()!=el_a )
12606         node = node->getParentNode();
12607     if ( !node )
12608         return lString32::empty_str;
12609     a_xpointer.setNode(node);
12610     a_xpointer.setOffset(0);
12611     lString32 ref = node->getAttributeValue( LXML_NS_ANY, attr_href );
12612     if (!ref.empty() && ref[0] != '#')
12613         ref = DecodeHTMLUrlString(ref);
12614     return ref;
12615 }
12616 
12617 /// returns href attribute of <A> element, null string if not found
getHRef()12618 lString32 ldomXPointer::getHRef()
12619 {
12620     ldomXPointer unused_a_xpointer;
12621     return getHRef(unused_a_xpointer);
12622 }
12623 
12624 /// returns href attribute of <A> element, plus xpointer of <A> element itself
getHRef(ldomXPointer & a_xpointer)12625 lString32 ldomXRange::getHRef(ldomXPointer & a_xpointer)
12626 {
12627     if ( isNull() )
12628         return lString32::empty_str;
12629     return _start.getHRef(a_xpointer);
12630 }
12631 
12632 /// returns href attribute of <A> element, null string if not found
getHRef()12633 lString32 ldomXRange::getHRef()
12634 {
12635     if ( isNull() )
12636         return lString32::empty_str;
12637     return _start.getHRef();
12638 }
12639 
12640 
LVParseXMLStream(LVStreamRef stream,const elem_def_t * elem_table,const attr_def_t * attr_table,const ns_def_t * ns_table)12641 ldomDocument * LVParseXMLStream( LVStreamRef stream,
12642                               const elem_def_t * elem_table,
12643                               const attr_def_t * attr_table,
12644                               const ns_def_t * ns_table )
12645 {
12646     if ( stream.isNull() )
12647         return NULL;
12648     bool error = true;
12649     ldomDocument * doc;
12650     doc = new ldomDocument();
12651     doc->setDocFlags( 0 );
12652 
12653     ldomDocumentWriter writer(doc);
12654     doc->setNodeTypes( elem_table );
12655     doc->setAttributeTypes( attr_table );
12656     doc->setNameSpaceTypes( ns_table );
12657 
12658     /// FB2 format
12659     LVFileFormatParser * parser = new LVXMLParser(stream, &writer);
12660     if ( parser->CheckFormat() ) {
12661         if ( parser->Parse() ) {
12662             error = false;
12663         }
12664     }
12665     delete parser;
12666     if ( error ) {
12667         delete doc;
12668         doc = NULL;
12669     }
12670     return doc;
12671 }
12672 
LVParseHTMLStream(LVStreamRef stream,const elem_def_t * elem_table,const attr_def_t * attr_table,const ns_def_t * ns_table)12673 ldomDocument * LVParseHTMLStream( LVStreamRef stream,
12674                               const elem_def_t * elem_table,
12675                               const attr_def_t * attr_table,
12676                               const ns_def_t * ns_table )
12677 {
12678     if ( stream.isNull() )
12679         return NULL;
12680     bool error = true;
12681     ldomDocument * doc;
12682     doc = new ldomDocument();
12683     doc->setDocFlags( 0 );
12684 
12685     ldomDocumentWriterFilter writerFilter(doc, false, HTML_AUTOCLOSE_TABLE);
12686     doc->setNodeTypes( elem_table );
12687     doc->setAttributeTypes( attr_table );
12688     doc->setNameSpaceTypes( ns_table );
12689 
12690     /// FB2 format
12691     LVFileFormatParser * parser = new LVHTMLParser(stream, &writerFilter);
12692     if ( parser->CheckFormat() ) {
12693         if ( parser->Parse() ) {
12694             error = false;
12695         }
12696     }
12697     delete parser;
12698     if ( error ) {
12699         delete doc;
12700         doc = NULL;
12701     }
12702     return doc;
12703 }
12704 
12705 #if 0
12706 static lString32 escapeDocPath( lString32 path )
12707 {
12708     for ( int i=0; i<path.length(); i++ ) {
12709         lChar32 ch = path[i];
12710         if ( ch=='/' || ch=='\\')
12711             path[i] = '_';
12712     }
12713     return path;
12714 }
12715 #endif
12716 
12717 /////////////////////////////////////////////////////////////////
12718 /// ldomDocumentFragmentWriter
12719 // Used for EPUB with each individual HTML files in the EPUB,
12720 // drives ldomDocumentWriter to build one single document from them.
12721 
convertId(lString32 id)12722 lString32 ldomDocumentFragmentWriter::convertId( lString32 id )
12723 {
12724     if ( !codeBasePrefix.empty() ) {
12725         return codeBasePrefix + "_" + " " + id;//add a space for later
12726     }
12727     return id;
12728 }
12729 
convertHref(lString32 href)12730 lString32 ldomDocumentFragmentWriter::convertHref( lString32 href )
12731 {
12732     if ( href.pos("://")>=0 )
12733         return href; // fully qualified href: no conversion
12734     if ( href.length() > 10 && href[4] == ':' && href.startsWith(lString32("data:image/")) )
12735         return href; // base64 encoded image (<img src="...>): no conversion
12736 
12737     //CRLog::trace("convertHref(%s, codeBase=%s, filePathName=%s)", LCSTR(href), LCSTR(codeBase), LCSTR(filePathName));
12738 
12739     if (href[0] == '#') {
12740         // Link to anchor in the same docFragment
12741         lString32 replacement = pathSubstitutions.get(filePathName);
12742         if (replacement.empty())
12743             return href;
12744         lString32 p = cs32("#") + replacement + "_" + " " + href.substr(1);
12745         //CRLog::trace("href %s -> %s", LCSTR(href), LCSTR(p));
12746         return p;
12747     }
12748 
12749     // href = LVCombinePaths(codeBase, href);
12750 
12751     // Depending on what's calling us, href may or may not have
12752     // gone thru DecodeHTMLUrlString() to decode %-encoded bits.
12753     // We'll need to try again with DecodeHTMLUrlString() if not
12754     // initially found in "pathSubstitutions" (whose filenames went
12755     // thru DecodeHTMLUrlString(), and so did 'codeBase').
12756 
12757     // resolve relative links
12758     lString32 p, id; // path, id
12759     if ( !href.split2(cs32("#"), p, id) )
12760         p = href;
12761     if ( p.empty() ) {
12762         //CRLog::trace("codebase = %s -> href = %s", LCSTR(codeBase), LCSTR(href));
12763         if ( codeBasePrefix.empty() )
12764             return LVCombinePaths(codeBase, href);
12765         p = codeBasePrefix;
12766     }
12767     else {
12768         lString32 replacement = pathSubstitutions.get(LVCombinePaths(codeBase, p));
12769         //CRLog::trace("href %s -> %s", LCSTR(p), LCSTR(replacement));
12770         if ( !replacement.empty() )
12771             p = replacement;
12772         else {
12773             // Try again after DecodeHTMLUrlString()
12774             p = DecodeHTMLUrlString(p);
12775             replacement = pathSubstitutions.get(LVCombinePaths(codeBase, p));
12776             if ( !replacement.empty() )
12777                 p = replacement;
12778             else
12779                 return LVCombinePaths(codeBase, href);
12780         }
12781         //else
12782         //    p = codeBasePrefix;
12783         //p = LVCombinePaths( codeBase, p ); // relative to absolute path
12784     }
12785     if ( !id.empty() )
12786         p = p + "_" + " " + id;
12787 
12788     p = cs32("#") + p;
12789 
12790     //CRLog::debug("converted href=%s to %s", LCSTR(href), LCSTR(p) );
12791 
12792     return p;
12793 }
12794 
setCodeBase(lString32 fileName)12795 void ldomDocumentFragmentWriter::setCodeBase( lString32 fileName )
12796 {
12797     filePathName = fileName;
12798     codeBasePrefix = pathSubstitutions.get(fileName);
12799     codeBase = LVExtractPath(filePathName);
12800     if ( codeBasePrefix.empty() ) {
12801         CRLog::trace("codeBasePrefix is empty for path %s", LCSTR(fileName));
12802         codeBasePrefix = pathSubstitutions.get(fileName);
12803     }
12804     stylesheetFile.clear();
12805 }
12806 
12807 /// called on attribute
OnAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)12808 void ldomDocumentFragmentWriter::OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue )
12809 {
12810     if ( insideTag ) {
12811         if ( !lStr_cmp(attrname, "href") || !lStr_cmp(attrname, "src") ) {
12812             parent->OnAttribute(nsname, attrname, convertHref(lString32(attrvalue)).c_str() );
12813         } else if ( !lStr_cmp(attrname, "id") ) {
12814             parent->OnAttribute(nsname, attrname, convertId(lString32(attrvalue)).c_str() );
12815         } else if ( !lStr_cmp(attrname, "name") ) {
12816             //CRLog::trace("name attribute = %s", LCSTR(lString32(attrvalue)));
12817             parent->OnAttribute(nsname, attrname, convertId(lString32(attrvalue)).c_str() );
12818         } else {
12819             parent->OnAttribute(nsname, attrname, attrvalue);
12820         }
12821     } else {
12822         if (insideHtmlTag) {
12823             // Grab attributes from <html dir="rtl" lang="he"> (not included in the DOM)
12824             // to reinject them in <DocFragment>
12825             if ( !lStr_cmp(attrname, "dir") )
12826                 htmlDir = attrvalue;
12827             else if ( !lStr_cmp(attrname, "lang") )
12828                 htmlLang = attrvalue;
12829         }
12830         else if ( styleDetectionState ) {
12831             if ( !lStr_cmp(attrname, "rel") && lString32(attrvalue).lowercase() == U"stylesheet" )
12832                 styleDetectionState |= 2;
12833             else if ( !lStr_cmp(attrname, "type") ) {
12834                 if ( lString32(attrvalue).lowercase() == U"text/css")
12835                     styleDetectionState |= 4;
12836                 else
12837                     styleDetectionState = 0;  // text/css type supported only
12838             } else if ( !lStr_cmp(attrname, "href") ) {
12839                 styleDetectionState |= 8;
12840                 lString32 href = attrvalue;
12841                 if ( stylesheetFile.empty() )
12842                     tmpStylesheetFile = LVCombinePaths( codeBase, href );
12843                 else
12844                     tmpStylesheetFile = href;
12845             }
12846             if (styleDetectionState == 15) {
12847                 if ( !stylesheetFile.empty() )
12848                     stylesheetLinks.add(tmpStylesheetFile);
12849                 else
12850                     stylesheetFile = tmpStylesheetFile;
12851                 styleDetectionState = 0;
12852                 CRLog::trace("CSS file href: %s", LCSTR(stylesheetFile));
12853             }
12854         }
12855     }
12856 }
12857 
12858 /// called on opening tag
OnTagOpen(const lChar32 * nsname,const lChar32 * tagname)12859 ldomNode * ldomDocumentFragmentWriter::OnTagOpen( const lChar32 * nsname, const lChar32 * tagname )
12860 {
12861     if ( insideTag ) {
12862         return parent->OnTagOpen(nsname, tagname);
12863     } else {
12864         if ( !lStr_cmp(tagname, "link") )
12865             styleDetectionState = 1;
12866         else if ( !lStr_cmp(tagname, "style") )
12867             headStyleState = 1;
12868         else if ( !lStr_cmp(tagname, "html") ) {
12869             insideHtmlTag = true;
12870             htmlDir.clear();
12871             htmlLang.clear();
12872         }
12873     }
12874 
12875     // When meeting the <body> of each of an EPUB's embedded HTML files,
12876     // we will insert into parent (the ldomDocumentWriter that makes out a single
12877     // document) a <DocFragment> wrapping that <body>. It may end up as:
12878     //
12879     //   <DocFragment StyleSheet="OEBPS/Styles/main.css" id="_doc_fragment_2">
12880     //     <stylesheet href="OEBPS/Text/">
12881     //       @import url("../Styles/other.css");
12882     //       @import url(path_to_3rd_css_file)
12883     //       here is <HEAD><STYLE> content
12884     //     </stylesheet>
12885     //     <body>
12886     //       here is original <BODY> content
12887     //     </body>
12888     //   </DocFragment>
12889     //
12890     // (Why one css file link in an attribute and others in the tag?
12891     // I suppose it's because attribute values are hashed and stored only
12892     // once, so it saves space in the DOM/cache for documents with many
12893     // fragments and a single CSS link, which is the most usual case.)
12894 
12895     if ( !insideTag && baseTag==tagname ) { // with EPUBs: baseTag="body"
12896         insideTag = true;
12897         if ( !baseTagReplacement.empty() ) { // with EPUBs: baseTagReplacement="DocFragment"
12898             baseElement = parent->OnTagOpen(U"", baseTagReplacement.c_str()); // start <DocFragment
12899             lastBaseElement = baseElement;
12900             if ( !stylesheetFile.empty() ) {
12901                 // add attribute <DocFragment StyleSheet="path_to_css_1st_file"
12902                 parent->OnAttribute(U"", U"StyleSheet", stylesheetFile.c_str() );
12903                 CRLog::debug("Setting StyleSheet attribute to %s for document fragment", LCSTR(stylesheetFile) );
12904             }
12905             if ( !codeBasePrefix.empty() ) // add attribute <DocFragment id="..html_file_name"
12906                 parent->OnAttribute(U"", U"id", codeBasePrefix.c_str() );
12907             if ( !htmlDir.empty() ) // add attribute <DocFragment dir="rtl" from <html dir="rtl"> tag
12908                 parent->OnAttribute(U"", U"dir", htmlDir.c_str() );
12909             if ( !htmlLang.empty() ) // add attribute <DocFragment lang="ar" from <html lang="ar"> tag
12910                 parent->OnAttribute(U"", U"lang", htmlLang.c_str() );
12911             if (this->m_nonlinear)
12912                 parent->OnAttribute(U"", U"NonLinear", U"" );
12913 
12914             parent->OnTagBody(); // inside <DocFragment>
12915             if ( !headStyleText.empty() || stylesheetLinks.length() > 0 ) {
12916                 // add stylesheet element as child of <DocFragment>: <stylesheet href="...">
12917                 parent->OnTagOpen(U"", U"stylesheet");
12918                 parent->OnAttribute(U"", U"href", codeBase.c_str() );
12919                 lString32 imports;
12920                 for (int i = 0; i < stylesheetLinks.length(); i++) {
12921                     lString32 import("@import url(\"");
12922                     import << stylesheetLinks.at(i);
12923                     import << "\");\n";
12924                     imports << import;
12925                 }
12926                 stylesheetLinks.clear();
12927                 lString32 styleText = imports + headStyleText.c_str();
12928                 // Add it to <DocFragment><stylesheet>, so it becomes:
12929                 //   <stylesheet href="...">
12930                 //     @import url(path_to_css_2nd_file)
12931                 //     @import url(path_to_css_3rd_file)
12932                 //     here is <HEAD><STYLE> content
12933                 //   </stylesheet>
12934                 parent->OnTagBody();
12935                 parent->OnText(styleText.c_str(), styleText.length(), 0);
12936                 parent->OnTagClose(U"", U"stylesheet");
12937                 // done with <DocFragment><stylesheet>...</stylesheet>
12938             }
12939             // Finally, create <body> and go on.
12940             // The styles we have just set via <stylesheet> element and
12941             // StyleSheet= attribute will be applied by this OnTagOpen("body")
12942             // (including those that may apply to body itself), push()'ing
12943             // the previous stylesheet state, that will be pop()'ed when the
12944             // ldomElementWriter for DocFragment is left/destroyed (by onBodyExit(),
12945             // because this OnTagOpen has set to it _stylesheetIsSet).
12946             parent->OnTagOpen(U"", baseTag.c_str());
12947             parent->OnTagBody();
12948             return baseElement;
12949         }
12950     }
12951     return NULL;
12952 }
12953 
12954 /// called on closing tag
OnTagClose(const lChar32 * nsname,const lChar32 * tagname,bool self_closing_tag)12955 void ldomDocumentFragmentWriter::OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag )
12956 {
12957     styleDetectionState = headStyleState = 0;
12958     if ( insideTag && baseTag==tagname ) {
12959         insideTag = false;
12960         if ( !baseTagReplacement.empty() ) {
12961             parent->OnTagClose(U"", baseTag.c_str());
12962             parent->OnTagClose(U"", baseTagReplacement.c_str());
12963         }
12964         baseElement = NULL;
12965         return;
12966     }
12967     if ( insideTag )
12968         parent->OnTagClose(nsname, tagname, self_closing_tag);
12969 }
12970 
12971 /// called after > of opening tag (when entering tag body) or just before /> closing tag for empty tags
OnTagBody()12972 void ldomDocumentFragmentWriter::OnTagBody()
12973 {
12974     if ( insideTag ) {
12975         parent->OnTagBody();
12976     }
12977     else if ( insideHtmlTag ) {
12978         insideHtmlTag = false;
12979     }
12980     if ( styleDetectionState == 11 ) {
12981         // incomplete <link rel="stylesheet", href="..." />; assuming type="text/css"
12982         if ( !stylesheetFile.empty() )
12983             stylesheetLinks.add(tmpStylesheetFile);
12984         else
12985             stylesheetFile = tmpStylesheetFile;
12986         styleDetectionState = 0;
12987     } else
12988         styleDetectionState = 0;
12989 }
12990 
12991 
12992 
12993 /////////////////////////////////////////////////////////////////
12994 /// ldomDocumentWriterFilter
12995 // Used to parse lousy HTML in formats: HTML, CHM, PDB(html)
12996 // For all these document formats, it is fed by HTMLParser that does
12997 // convert to lowercase the tag names and attributes.
12998 // ldomDocumentWriterFilter does then deal with auto-closing unbalanced
12999 // HTML tags according to the rules set in crengine/src/lvxml.cpp HTML_AUTOCLOSE_TABLE[]
13000 
13001 /** \brief callback object to fill DOM tree
13002 
13003     To be used with XML parser as callback object.
13004 
13005     Creates document according to incoming events.
13006 
13007     Autoclose HTML tags.
13008 */
13009 
setClass(const lChar32 * className,bool overrideExisting)13010 void ldomDocumentWriterFilter::setClass( const lChar32 * className, bool overrideExisting )
13011 {
13012     ldomNode * node = _currNode->_element;
13013     if ( _classAttrId==0 ) {
13014         _classAttrId = _document->getAttrNameIndex(U"class");
13015     }
13016     if ( overrideExisting || !node->hasAttribute(_classAttrId) ) {
13017         node->setAttributeValue(LXML_NS_NONE, _classAttrId, className);
13018     }
13019 }
13020 
appendStyle(const lChar32 * style)13021 void ldomDocumentWriterFilter::appendStyle( const lChar32 * style )
13022 {
13023     ldomNode * node = _currNode->_element;
13024     if ( _styleAttrId==0 ) {
13025         _styleAttrId = _document->getAttrNameIndex(U"style");
13026     }
13027     // Append to the style attribute even if embedded styles are disabled
13028     // at loading time, otherwise it won't be there if we enable them later
13029     // if (!_document->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES))
13030     //     return; // disabled
13031 
13032     lString32 oldStyle = node->getAttributeValue(_styleAttrId);
13033     if ( !oldStyle.empty() && oldStyle.at(oldStyle.length()-1)!=';' )
13034         oldStyle << "; ";
13035     oldStyle << style;
13036     node->setAttributeValue(LXML_NS_NONE, _styleAttrId, oldStyle.c_str());
13037 }
13038 
13039 // Legacy auto close handler (gDOMVersionRequested < 20200824)
AutoClose(lUInt16 tag_id,bool open)13040 void ldomDocumentWriterFilter::AutoClose( lUInt16 tag_id, bool open )
13041 {
13042     lUInt16 * rule = _rules[tag_id];
13043     if ( !rule )
13044         return;
13045     if ( open ) {
13046         ldomElementWriter * found = NULL;
13047         ldomElementWriter * p = _currNode;
13048         while ( p && !found ) {
13049             lUInt16 id = p->_element->getNodeId();
13050             for ( int i=0; rule[i]; i++ ) {
13051                 if ( rule[i]==id ) {
13052                     found = p;
13053                     break;
13054                 }
13055             }
13056             p = p->_parent;
13057         }
13058         // found auto-close target
13059         if ( found != NULL ) {
13060             bool done = false;
13061             while ( !done && _currNode ) {
13062                 if ( _currNode == found )
13063                     done = true;
13064                 ldomNode * closedElement = _currNode->getElement();
13065                 _currNode = pop( _currNode, closedElement->getNodeId() );
13066                 //ElementCloseHandler( closedElement );
13067             }
13068         }
13069     } else {
13070         if ( !rule[0] )
13071             _currNode = pop( _currNode, _currNode->getElement()->getNodeId() );
13072     }
13073 }
13074 
13075 // With gDOMVersionRequested >= 20200824, we use hardcoded rules
13076 // for opening and closing tags, trying to follow what's relevant
13077 // in the HTML Living Standard (=HTML5):
13078 //   https://html.spec.whatwg.org/multipage/parsing.html
13079 // A less frightening introduction is available at:
13080 //   https://htmlparser.info/parser/
13081 //
13082 // Note that a lot of rules and checks in the algorithm are for
13083 // noticing "parser errors", with usually a fallback of ignoring
13084 // it and going on.
13085 // We ensure one tedious requirement: foster parenting of non-table
13086 // elements met while building a table, mostly to not have mis-nested
13087 // content simply ignored and not shown to the user.
13088 // Other tedious requirements not ensured might just have some impact
13089 // on the styling of the content, which should be a minor issue.
13090 //
13091 // It feels that we can simplify it to the following implementation,
13092 // with possibly some cases not handled related to:
13093 // - FORM and form elements (SELECT, INPUT, OPTION...)
13094 // - TEMPLATE, APPLET, OBJECT, MARQUEE
13095 // - Mis-nested HTML/BODY/HEAD
13096 // - Reconstructing the active formatting elements (B, I...) when
13097 //   mis-nested or "on hold" when entering block or table elements.
13098 // - The "adoption agency algorithm" for mis-nested formatting
13099 //   elements (and nested <A>)
13100 // - We may not ignore some opening tag that we normally should
13101 //   (like HEAD or FRAME when in BODY) (but we ignore a standalone
13102 //   sub-table element when not inside a TABLE) as this would
13103 //   complicate the internal parser state.
13104 //
13105 // Of interest:
13106 // https://html.spec.whatwg.org/multipage/parsing.html#parse-state
13107 //   List of "special" elements
13108 //   List of elements for rules "have a particular element in X scope"
13109 // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
13110 //   Specific rules when start or end tag of specific elements is met
13111 
13112 // Scope are for limiting ancestor search when looking for a previous
13113 // element to close (a closing tag may be ignored if no opening tag is
13114 // found in the specified scope)
13115 enum ScopeType {
13116 HTML_SCOPE_NONE = 0,       // no stop tag
13117 HTML_SCOPE_MAIN,           // HTML, TABLE, TD, TH, CAPTION, APPLET, MARQUEE, OBJECT, TEMPLATE
13118 HTML_SCOPE_LIST_ITEM,      // = SCOPE_MAIN + OL, UL
13119 HTML_SCOPE_BUTTON,         // = SCOPE_MAIN + BUTTON (not used, only used with P that we handle specifically)
13120 HTML_SCOPE_TABLE,          // HTML, TABLE, TEMPLATE
13121 HTML_SCOPE_SELECT,         // All elements stop, except OPTGROUP, OPTION
13122 HTML_SCOPE_SPECIALS,       // All specials elements (inline don't close across block/specials elements)
13123 // Next ones are scopes with specific behaviours that may ignore target_id
13124 HTML_SCOPE_OPENING_LI,     // = SCOPE_SPECIALS, minus ADDRESS, DIV, P: close any LI
13125 HTML_SCOPE_OPENING_DT_DD,  // = SCOPE_SPECIALS, minus ADDRESS, DIV, P: close any DT/DD
13126 HTML_SCOPE_OPENING_H1_H6,  // = close current node if H1, H2, H3, H4, H5, H6
13127 HTML_SCOPE_CLOSING_H1_H6,  // = SCOPE_MAIN: close any of H1..H6
13128 HTML_SCOPE_TABLE_TO_TOP,   // = SCOPE_TABLE: close all table sub-elements to end up being TABLE
13129 HTML_SCOPE_TABLE_OPENING_TD_TH, // = SCOPE_TABLE: close any TD/TH
13130 };
13131 // Note: as many elements close a P, we don't handle checking and closing them
13132 // via popUpTo(NULL, el_p, HTML_SCOPE_BUTTON), but we keep the last P as _lastP
13133 // so we can just popUpTo(_lastP) if set when meeting a "close a P" element.
13134 
13135 // Boxing elements (id < el_DocFragment) (and DocFragment itself,
13136 // not used with the HTMLParser) are normally added by crengine
13137 // after "delete ldomElementWriter" (which calls onBodyExit()
13138 // which calls initNodeRendMethod()), so after we have closed
13139 // and pass by the element.
13140 // So, we shouldn't meet any in popUpTo() and don't have to wonder
13141 // if we should stop at them, or pass by them.
13142 
popUpTo(ldomElementWriter * target,lUInt16 target_id,int scope)13143 lUInt16 ldomDocumentWriterFilter::popUpTo( ldomElementWriter * target, lUInt16 target_id, int scope )
13144 {
13145     if ( !target ) {
13146         // Check if there's an element with provided target_id in the stack inside this scope
13147         ldomElementWriter * tmp = _currNode;
13148         while ( tmp ) {
13149             lUInt16 tmpId = tmp->getElement()->getNodeId();
13150             if ( tmpId < el_DocFragment && tmpId > el_NULL) {
13151                 // We shouldn't meet any (see comment above)
13152                 // (but we can meet the root node when poping </html>)
13153                 crFatalError( 127, "Unexpected boxing element met in ldomDocumentWriterFilter::popUpTo()" );
13154             }
13155             if ( target_id && tmpId == target_id )
13156                 break;
13157             if ( _curFosteredNode && tmp == _curFosteredNode ) {
13158                 // If fostering and we're not closing the fostered node itself,
13159                 // don't go at closing stuff above the fostered node
13160                 tmp = NULL;
13161                 break;
13162             }
13163             // Check scope stop tags
13164             bool stop = false;
13165             switch (scope) {
13166                 case HTML_SCOPE_MAIN: // stop at HTML/TABLE/TD...
13167                     if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13168                          tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ) {
13169                         tmp = NULL;
13170                         stop = true;
13171                     }
13172                     break;
13173                 case HTML_SCOPE_LIST_ITEM: // stop at SCOPE_MAIN + OL, UL
13174                     if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13175                          tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ||
13176                          tmpId == el_ol || tmpId == el_ul ) {
13177                         tmp = NULL;
13178                         stop = true;
13179                     }
13180                     break;
13181                 case HTML_SCOPE_BUTTON: // stop at SCOPE_MAIN + BUTTON
13182                     if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13183                          tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ||
13184                          tmpId == el_button ) {
13185                         tmp = NULL;
13186                         stop = true;
13187                     }
13188                     break;
13189                 case HTML_SCOPE_TABLE: // stop at HTML and TABLE
13190                     if ( tmpId == el_html || tmpId == el_table || tmpId == el_template ) {
13191                         tmp = NULL;
13192                         stop = true;
13193                     }
13194                     break;
13195                 case HTML_SCOPE_SELECT:
13196                     // This one is different: all elements stop it, except optgroup and option
13197                     if ( tmpId != el_optgroup && tmpId != el_option ) {
13198                         tmp = NULL;
13199                         stop = true;
13200                     }
13201                     break;
13202                 case HTML_SCOPE_SPECIALS: // stop at any "special" element
13203                     if ( tmpId >= EL_SPECIAL_START && tmpId <= EL_SPECIAL_END ) {
13204                         tmp = NULL;
13205                         stop = true;
13206                     }
13207                     break;
13208                 case HTML_SCOPE_OPENING_LI:
13209                     if ( tmpId == el_li ) {
13210                         stop = true;
13211                     }
13212                     else if ( tmpId >= EL_SPECIAL_START && tmpId <= EL_SPECIAL_END &&
13213                          tmpId != el_div && tmpId != el_p && tmpId != el_address ) {
13214                         tmp = NULL;
13215                         stop = true;
13216                     }
13217                     break;
13218                 case HTML_SCOPE_OPENING_DT_DD:
13219                     if ( tmpId == el_dt || tmpId == el_dd ) {
13220                         stop = true;
13221                     }
13222                     else if ( tmpId >= EL_SPECIAL_START && tmpId <= EL_SPECIAL_END &&
13223                          tmpId != el_div && tmpId != el_p && tmpId != el_address ) {
13224                         tmp = NULL;
13225                         stop = true;
13226                     }
13227                     break;
13228                 case HTML_SCOPE_OPENING_H1_H6:
13229                     // Close immediate parent H1...H6, but don't walk up
13230                     // <H3> ... <H4> : H4 will close H3
13231                     // <H3> ... <B> ... <H4> : H4 will not close H3
13232                     if ( tmpId < el_h1 || tmpId > el_h6 ) {
13233                         tmp = NULL; // Nothing to close
13234                     }
13235                     stop = true; // Don't check upper
13236                     break;
13237                 case HTML_SCOPE_CLOSING_H1_H6:
13238                     if ( tmpId >= el_h1 && tmpId <= el_h6 ) {
13239                         stop = true;
13240                     }
13241                     else if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13242                          tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ) {
13243                         tmp = NULL;
13244                         stop = true;
13245                     }
13246                     break;
13247                 case HTML_SCOPE_TABLE_TO_TOP:
13248                     if ( tmp->_parent && tmp->_parent->getElement()->getNodeId() == el_table ) {
13249                         stop = true;
13250                     }
13251                     else if ( tmpId == el_html || tmpId == el_table || tmpId == el_template ) {
13252                         tmp = NULL;
13253                         stop = true;
13254                     }
13255                     break;
13256                 case HTML_SCOPE_TABLE_OPENING_TD_TH:
13257                     if ( tmpId == el_td || tmpId == el_th ) {
13258                         stop = true;
13259                     }
13260                     else if ( tmpId == el_html || tmpId == el_table || tmpId == el_template ) {
13261                         tmp = NULL;
13262                         stop = true;
13263                     }
13264                     break;
13265                 case HTML_SCOPE_NONE:
13266                 default:
13267                     // Never stop, continue up to root node
13268                     break;
13269             }
13270             if ( stop )
13271                 break;
13272             tmp = tmp->_parent;
13273         }
13274         target = tmp; // (NULL if not found, NULL or not if stopped)
13275     }
13276     if ( target ) {
13277         // Assume target is valid and will be found
13278         while ( _currNode ) {
13279             // Update state for after this node is closed
13280             lUInt16 curNodeId = _currNode->getElement()->getNodeId();
13281             // Reset these flags if we see again these tags (so to
13282             // at least reconstruct </html><html><body><hr> when
13283             // meeting </html><hr> and have el_html as the catch all
13284             // element in SCOPEs working.
13285             if ( curNodeId == el_body ) {
13286                 _bodyTagSeen = false;
13287             }
13288             else if ( curNodeId == el_html ) {
13289                 _headTagSeen = false;
13290                 _htmlTagSeen = false;
13291             }
13292             if ( _lastP && _currNode == _lastP )
13293                 _lastP = NULL;
13294             ldomElementWriter * tmp = _currNode;
13295             bool done = _currNode == target;
13296             if ( _curFosteredNode && _currNode == _curFosteredNode ) {
13297                 // If we meet the fostered node, have it closed but don't
13298                 // go at closing above it
13299                 done = true;
13300                 _currNode = _curNodeBeforeFostering;
13301                 _curNodeBeforeFostering = NULL;
13302                 _curFosteredNode = NULL;
13303             }
13304             else {
13305                 _currNode = _currNode->_parent;
13306             }
13307             ElementCloseHandler( tmp->getElement() );
13308             delete tmp;
13309             if ( done )
13310                 break;
13311         }
13312     }
13313     return _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13314 }
13315 
13316 // To give as first parameter to AutoOpenClosePop()
13317 enum ParserStepType {
13318     PARSER_STEP_TAG_OPENING = 1,
13319     PARSER_STEP_TAG_CLOSING,
13320     PARSER_STEP_TAG_SELF_CLOSING,
13321     PARSER_STEP_TEXT
13322 };
13323 
13324 // More HTML5 conforming auto close handler (gDOMVersionRequested >= 20200824)
AutoOpenClosePop(int step,lUInt16 tag_id)13325 bool ldomDocumentWriterFilter::AutoOpenClosePop( int step, lUInt16 tag_id )
13326 {
13327     lUInt16 curNodeId = _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13328     if ( !_bodyTagSeen && ( step == PARSER_STEP_TAG_OPENING || step == PARSER_STEP_TEXT) ) {
13329         // Create some expected containing elements if not yet seen
13330         if ( !_headTagSeen ) {
13331             if ( !_htmlTagSeen ) {
13332                 _htmlTagSeen = true;
13333                 if ( tag_id != el_html ) {
13334                     OnTagOpen(U"", U"html");
13335                     OnTagBody();
13336                 }
13337             }
13338             if ( (tag_id >= EL_IN_HEAD_START && tag_id <= EL_IN_HEAD_END) || tag_id == el_noscript ) {
13339                 _headTagSeen = true;
13340                 if ( tag_id != el_head ) {
13341                     OnTagOpen(U"", U"head");
13342                     OnTagBody();
13343                 }
13344             }
13345             curNodeId = _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13346         }
13347         if ( tag_id >= EL_IN_BODY_START || (step == PARSER_STEP_TEXT && (curNodeId == el_html || curNodeId == el_head)) ) {
13348             // Tag usually found inside <body>, or text while being <HTML> or <HEAD>
13349             // (text while being in <HTML><HEAD><TITLE> should not trigger this):
13350             // end of <head> and start of <body>
13351             if ( _headTagSeen )
13352                 OnTagClose(U"", U"head");
13353             else
13354                 _headTagSeen = true; // We won't open any <head> anymore
13355             _bodyTagSeen = true;
13356             if ( tag_id != el_body ) {
13357                 OnTagOpen(U"", U"body");
13358                 OnTagBody();
13359             }
13360             curNodeId = _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13361         }
13362     }
13363     if ( step == PARSER_STEP_TEXT ) // new text: nothing more to do
13364         return true;
13365 
13366     bool is_self_closing_tag = false;
13367     switch (tag_id) {
13368         // These are scaterred among different ranges, so we sadly
13369         // can't use any range comparisons
13370         case el_area:
13371         case el_base:
13372         case el_br:
13373         case el_col:
13374         case el_embed:
13375         case el_hr:
13376         case el_img:
13377         case el_input:
13378         case el_link:
13379         case el_meta:
13380         case el_param:
13381         case el_source:
13382         case el_track:
13383         case el_wbr:
13384             is_self_closing_tag = true;
13385             break;
13386         default:
13387             break;
13388     }
13389 
13390     if ( step == PARSER_STEP_TAG_OPENING ) {
13391         // A new element with tag_id will be created after we return
13392         // We should
13393         // - create implicit parent elements for tag_id if not present (partially
13394         //   done for HTML/HEAD/BODY elements above)
13395         // - close elements that should be closed by this tag_id (some with optional
13396         //   end tags and others that the spec says so)
13397         // - keep a note if it's self-closing so we can close it when appropriate
13398         // - ignore this opening tag in some cases
13399 
13400         // Table elements can be ignored, create missing elements
13401         // and/or close some others
13402         if ( tag_id == el_th || tag_id == el_td ) {
13403             // Close any previous TD/TH in table scope if any
13404             curNodeId = popUpTo(NULL, 0, HTML_SCOPE_TABLE_OPENING_TD_TH);
13405             // We should be in a table or sub-table element
13406             // (a standalone TD is ignored)
13407             if ( curNodeId < el_table || curNodeId > el_tr )
13408                 return false; // Not in a table context: ignore this TD/TH
13409             // We must be in a TR. If we're not, have missing elements created
13410             if ( curNodeId != el_tr ) {
13411                 // This will create all the other missing elements if needed
13412                 OnTagOpen(U"", U"tr");
13413                 OnTagBody();
13414             }
13415         }
13416         else if ( tag_id == el_tr ) {
13417             // Close any previous TR in table scope if any
13418             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_TABLE);
13419             // We should be in a table or sub-table element
13420             // (a standalone TR is ignored)
13421             if ( curNodeId < el_table || curNodeId > el_tfoot )
13422                 return false; // Not in a table context: ignore this TR
13423             // We must be in a THEAD/TBODY/TFOOT. If we're not, have missing elements created
13424             if ( curNodeId < el_thead || curNodeId > el_tfoot ) {
13425                 // This will create all the other missing elements if needed
13426                 OnTagOpen(U"", U"tbody");
13427                 OnTagBody();
13428             }
13429         }
13430         else if ( tag_id == el_col ) {
13431             // Close any previous COL in table scope if any
13432             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_TABLE);
13433             // We should be in a table or sub-table element
13434             if ( curNodeId < el_table || curNodeId > el_td )
13435                 return false; // Not in a table context: ignore this TR
13436             // We must be in a COLGROUP. If we're not, have missing elements created
13437             if ( curNodeId != el_colgroup ) {
13438                 // This will create all the other missing elements if needed
13439                 OnTagOpen(U"", U"colgroup");
13440                 OnTagBody();
13441             }
13442         }
13443         else if ( (tag_id >= el_thead && tag_id <= el_tfoot) ||
13444                    tag_id == el_caption ||
13445                    tag_id == el_colgroup ) {
13446             // Close any previous THEAD/TBODY/TFOOT/CAPTION/COLGROUP/COL in table scope if any
13447             curNodeId = popUpTo(NULL, 0, HTML_SCOPE_TABLE_TO_TOP);
13448             // We should be in a table element
13449             if ( curNodeId != el_table )
13450                 return false; // Not in a table context
13451         }
13452 
13453         if ( tag_id == el_li ) {
13454             // A LI should close any previous LI, but should stop at specials
13455             // except ADDRESS, DIV and P (they will so stop at UL/OL and won't
13456             // close any upper LI that had another level of list opened).
13457             // Once that LI close, they should also close any P, which will
13458             // be taken care by followup check.
13459             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_OPENING_LI);
13460         }
13461         else if ( tag_id == el_dt || tag_id == el_dd ) {
13462             curNodeId = popUpTo(NULL, 0, HTML_SCOPE_OPENING_DT_DD);
13463         }
13464         else if ( tag_id == el_select ) {
13465             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_SELECT);
13466         }
13467         if ( _lastP && tag_id >= EL_SPECIAL_CLOSING_P_START && tag_id <= EL_SPECIAL_CLOSING_P_END ) {
13468             // All these should close a P "in button scope", meaning until a parent
13469             // with these tag names is met:
13470             //   html, table, td, th, caption, applet, marquee, object, template
13471             // These should all have closed any previous P when opened, except
13472             // applet, marquee, object, template - but to simplify things, we
13473             // made them close a P too. So, _lastP is always "in button scope".
13474             curNodeId = popUpTo(_lastP); // will set _lastP = NULL
13475             // Note: in "quirks mode", a TABLE should not close a P (should
13476             // we force this behaviour on old CHM files ? Having the table
13477             // close a P when it shouldn't will make the following text out
13478             // of P and possibly not styled as P).
13479         }
13480         if ( tag_id >= el_h1 && tag_id <= el_h6 ) {
13481             // After possibly closing a P, H1...H6 close any H1...H6 direct ancestor
13482             curNodeId = popUpTo(NULL, 0, HTML_SCOPE_OPENING_H1_H6);
13483         }
13484         else if ( curNodeId == el_option && (tag_id == el_optgroup || tag_id == el_option) ) {
13485             // Close previous option
13486             curNodeId = popUpTo(_currNode);
13487         }
13488         else if ( tag_id >= el_rbc && tag_id <= el_rp ) { // ruby sub-elements
13489             // The HTML5 specs says that:
13490             // - we should do that only if there is a RUBY in scope (but we don't check that)
13491             // - RB and RTC should close implied end tags, meaning: RB RP RT RTC
13492             // - RP and RT should close implied end tags except RTC, meaning: RB RP RT
13493             // But they don't mention the old <RBC> that we want to support
13494             // If we do, we end up with these rules (x for HTML specs, o for our added RBC support)
13495             //               tags to close
13496             //     tag_id   RBC RB RTC RT RP
13497             //     RBC       o  o   o  o  o
13498             //     RB           x   x  x  x
13499             //     RTC       o  x   x  x  x
13500             //     RT        o  x      x  x
13501             //     RP        o  x      x  x
13502             if ( tag_id == el_rbc || tag_id == el_rtc ) {
13503                 while ( curNodeId >= el_rbc && curNodeId <= el_rp ) {
13504                     curNodeId = popUpTo(_currNode);
13505                 }
13506             }
13507             else if ( tag_id == el_rb ) {
13508                 while ( curNodeId >= el_rb && curNodeId <= el_rp ) {
13509                     curNodeId = popUpTo(_currNode);
13510                 }
13511             }
13512             else { // el_rt || el_rp
13513                 while ( curNodeId >= el_rbc && curNodeId <= el_rp && curNodeId != el_rtc) {
13514                     curNodeId = popUpTo(_currNode);
13515                 }
13516             }
13517         }
13518 
13519         // Self closing will be handled in OnTagBody
13520         _curNodeIsSelfClosing = is_self_closing_tag;
13521     }
13522     else if ( step == PARSER_STEP_TAG_CLOSING || step == PARSER_STEP_TAG_SELF_CLOSING ) { // Closing, </tag_id> or <tag_id/>
13523         // We are responsible for poping up to and closing the provided tag_id,
13524         // or ignoring it if stopped or not found.
13525         if ( is_self_closing_tag ) {
13526             // We can ignore this closing tag, except in one case:
13527             // a standalone closing </BR> (so, not self closing)
13528             // Specs say we should insert a new/another one
13529             if ( tag_id == el_br && step == PARSER_STEP_TAG_CLOSING ) {
13530                 OnTagOpen(U"", U"br");
13531                 OnTagBody();
13532                 OnTagClose(U"", U"br", true);
13533                 return true;
13534             }
13535             return false; // ignored
13536         }
13537         if ( tag_id == curNodeId ) {
13538             // If closing current node, no need for more checks
13539             popUpTo(_currNode);
13540             return true;
13541         }
13542         if ( tag_id == el_p && !_lastP ) {
13543             // </P> without any previous <P> should emit <P></P>
13544             // Insert new one and pop it
13545             OnTagOpen(U"", U"p");
13546             OnTagBody();
13547             popUpTo(_currNode);
13548             return true;
13549         }
13550         if ( tag_id > EL_SPECIAL_END ) {
13551             // Inline elements don't close across specials
13552             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_SPECIALS);
13553         }
13554         else if ( tag_id >= el_h1 && tag_id <= el_h6 ) {
13555             // A closing Hn closes any other Hp
13556             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_CLOSING_H1_H6);
13557         }
13558         else if ( tag_id == el_li ) {
13559             // </li> shouldn't close across OL/UL
13560             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_LIST_ITEM);
13561             // Note: dt/dd (which have the same kind of auto-close previous
13562             // as LI for the opening tag) do not have any restriction, and
13563             // will use HTML_SCOPE_MAIN below
13564         }
13565         else if ( tag_id >= el_table && tag_id <= el_td ) {
13566             // Table sub-element: don't cross TABLE
13567             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_TABLE);
13568         }
13569         else if ( tag_id >= EL_SPECIAL_START ) {
13570             // All other "specials" close across nearly everything
13571             // except TABLE/TH/TD/CAPTION
13572             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_MAIN);
13573         }
13574         else {
13575             // Boxing elements are normally added by crengine after
13576             // "delete ldomElementWriter" (which calls onBodyExit()
13577             // which calls initNodeRendMethod()), so after we have
13578             // closed and pass by the element.
13579             // So, we shouldn't meet any.
13580             // But logically, they shouldn't have any limitation
13581             curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_NONE);
13582         }
13583         // SELECT should close any previous SELECT in HTML_SCOPE_SELECT,
13584         // which should contain only OPTGROUP and OPTION, but we don't
13585         // ensure that. So, we don't ensure this closing restriction.
13586     }
13587 
13588     // (Silences clang warning about 'curNodeId' is never read, if we
13589     // happen to not had the need to re-check it - but better to keep
13590     // updating it if we later add stuff that does use it)
13591     (void)curNodeId;
13592 
13593     return true;
13594 }
CheckAndEnsureFosterParenting(lUInt16 tag_id)13595 bool ldomDocumentWriterFilter::CheckAndEnsureFosterParenting(lUInt16 tag_id)
13596 {
13597     if ( !_currNode )
13598         return false;
13599     lUInt16 curNodeId = _currNode->getElement()->getNodeId();
13600     if ( curNodeId >= el_table && curNodeId <= el_tr && curNodeId != el_caption ) {
13601         if ( tag_id < el_table || tag_id > el_td ) {
13602             // Non table sub-element met as we expect only a table sub-element.
13603             // Ensure foster parenting: this node (and its content) is to be
13604             // inserted as a previous sibling of the table element we are in
13605             _curNodeBeforeFostering = NULL;
13606             // Look for the containing table element
13607             ldomElementWriter * elem = _currNode;
13608             while ( elem ) {
13609                 if ( elem->getElement()->getNodeId() == el_table ) {
13610                     break;
13611                 }
13612                 elem = elem->_parent;
13613             }
13614             if ( elem ) { // found it
13615                 _curNodeBeforeFostering = _currNode;
13616                 _currNode = elem->_parent; // parent of table
13617                 return true; // Insert the new element in _currNode (the parent of this
13618                              // table), before its last child (which is this table)
13619             }
13620         }
13621         // We're in a table, and we see an expected sub-table element: all is fine
13622         return false;
13623     }
13624     else if ( _curFosteredNode ) {
13625         // We've been foster parenting: if we see a table sub-element,
13626         // stop foster parenting and restore the original noce
13627         if ( tag_id >= el_table && tag_id <= el_td ) {
13628             popUpTo(_curFosteredNode);
13629             // popUpTo() has restored _currNode to _curNodeBeforeFostering and
13630             // reset _curFosteredNode and _curNodeBeforeFostering to NULL
13631         }
13632     }
13633     return false;
13634 }
13635 
OnTagOpen(const lChar32 * nsname,const lChar32 * tagname)13636 ldomNode * ldomDocumentWriterFilter::OnTagOpen( const lChar32 * nsname, const lChar32 * tagname )
13637 {
13638     // We expect from the parser to always have OnTagBody called
13639     // after OnTagOpen before any other OnTagOpen
13640     if ( !_tagBodyCalled ) {
13641         CRLog::error("OnTagOpen w/o parent's OnTagBody : %s", LCSTR(lString32(tagname)));
13642         crFatalError();
13643     }
13644     // _tagBodyCalled = false;
13645     // We delay setting _tagBodyCalled=false to below as we may create
13646     // additional wrappers before inserting this new element
13647 
13648     lUInt16 id = _document->getElementNameIndex(tagname);
13649     lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
13650 
13651     // http://lib.ru/ books detection (a bit ugly to have this hacked
13652     // into ldomDocumentWriterFilter, but well, it's been there for ages
13653     // and it seems quite popular and expected to have crengine handle
13654     // Lib.ru books without any conversion needed).
13655     // Detection has been reworked to be done here (in OnTagOpen). It
13656     // was previously done in ElementCloseHandler/OnTagClose when closing
13657     // the elements, and as it removed the FORM node from the DOM, it
13658     // caused a display hash mismatch which made the cache invalid.
13659     // So, do it here and don't remove any node but make then hidden.
13660     // Lib.ru books (in the 2 formats that are supported, "Lib.ru html"
13661     // and "Fine HTML"), have this early in the document:
13662     //   <div align=right><form action=/INPROZ/ASTURIAS/asturias1_1.txt><select name=format><OPTION...>
13663     // Having a FORM child of a DIV with align=right is assumed to be
13664     // quite rare, so check for that.
13665     bool setDisplayNone = false;
13666     bool setParseAsPre = false;
13667     if ( _libRuDocumentToDetect && id == el_form ) {
13668         // At this point _currNode is still the parent of the FORM that is opening
13669         if ( _currNode && _currNode->_element->getNodeId() == el_div ) {
13670             ldomNode * node = _currNode->_element;
13671             lString32 style = node->getAttributeValue(attr_style);
13672             // align=right would have been translated to style="text-align: right"
13673             if ( !style.empty() && style.pos("text-align: right", 0) >= 0 ) {
13674                 _libRuDocumentDetected = true;
13675                 // We can't set this DIV to be display:none as the element
13676                 // has already had setNodeStyle() called and applied, so
13677                 // it would take effect only on re-renderings (and would
13678                 // cause a display hash mismatch).
13679                 // So, we'll set it on the FORM just after it's created below
13680                 setDisplayNone = true;
13681             }
13682         }
13683         // If the first FORM met doesn't match, no need keep detecting
13684         _libRuDocumentToDetect = false;
13685     }
13686     // Fixed 20180503: this was done previously in any case, but now only
13687     // if _libRuDocumentDetected. We still allow the old behaviour if
13688     // requested to keep previously recorded XPATHs valid.
13689     if ( _libRuDocumentDetected || _document->getDOMVersionRequested() < 20180503) {
13690         // Patch for bad LIB.RU books - BR delimited paragraphs
13691         // in "Fine HTML" format, that appears as:
13692         //   <br>&nbsp; &nbsp; &nbsp; Viento fuerte, 1950
13693         //   <br>&nbsp; &nbsp; &nbsp; Spellcheck [..., with \n every 76 chars]
13694         if ( id == el_br || id == el_dd ) {
13695             // Replace such BR with P
13696             id = el_p;
13697             _libRuParagraphStart = true; // to trim leading &nbsp;
13698         } else {
13699             _libRuParagraphStart = false;
13700         }
13701         if ( _libRuDocumentDetected && id == el_pre ) {
13702             // "Lib.ru html" format is actually minimal HTML with
13703             // the text wrapped in <PRE>. We will parse this text
13704             // to build proper HTML with each paragraph wrapped
13705             // in a <P> (this is done by the XMLParser when we give
13706             // it TXTFLG_PRE_PARA_SPLITTING).
13707             // Once that is detected, we don't want it to be PRE
13708             // anymore (so that on re-renderings, it's not handled
13709             // as white-space: pre), so we're swapping this PRE with
13710             // a DIV element. But we need to still parse the text
13711             // when building the DOM as PRE.
13712             id = el_div;
13713             ldomNode * n = _currNode ? _currNode->getElement() : NULL;
13714             if ( n && n->getNodeId() == el_pre ) {
13715                 // Also close any previous PRE that would have been
13716                 // auto-closed if we kept PRE as PRE (from now on,
13717                 // we'll convert PRE to DIV), as this unclosed PRE
13718                 // would apply to all the text.
13719                 _currNode = pop( _currNode, el_pre);
13720             }
13721             else if ( n && n->getNodeId() == el_div && n->hasAttribute( attr_ParserHint ) &&
13722                         n->getAttributeValue( attr_ParserHint ) == U"ParseAsPre" ) {
13723                 // Also close any previous PRE we already masqueraded
13724                 // as <DIV ParserHint="ParseAsPre">
13725                 _currNode = pop( _currNode, el_div);
13726             }
13727             // Below, we'll then be inserting a DIV, which won't be TXTFLG_PRE.
13728             // We'll need to re-set _flags to be TXTFLG_PRE in our OnTagBody(),
13729             // after it has called the superclass's OnTagBody(),
13730             // as ldomDocumentWriter::OnTagBody() will call onBodyEnter() which
13731             // will have set default styles (so, not TXTFLG_PRE for DIV as its
13732             // normal style is "white-space: normal").
13733             // We'll add the attribute ParserHint="ParseAsPre" below so
13734             // we know it was a PRE and do various tweaks.
13735             setParseAsPre = true;
13736         }
13737     }
13738 
13739     bool tag_accepted = true;
13740     bool insert_before_last_child = false;
13741     if (_document->getDOMVersionRequested() >= 20200824) { // A little bit more HTML5 conformance
13742         if ( id == el_image )
13743             id = el_img;
13744         if ( tagname && tagname[0] == '?' ) {
13745             // The XML parser feeds us XML processing instructions like '<?xml ... ?>'
13746             // Firefox wraps them in a comment <!--?xml ... ?-->.
13747             // As we ignore comments, ignore them too.
13748             tag_accepted = false;
13749         }
13750         else if ( CheckAndEnsureFosterParenting(id) ) {
13751             // https://html.spec.whatwg.org/multipage/parsing.html#foster-parent
13752             // If non-sub-table element opening while we're still
13753             // inside sub-table non-TD/TH elements, we should
13754             // do foster parenting: insert the node as the previous
13755             // sibling of the TABLE element we're dealing with
13756             insert_before_last_child = true;
13757             // As we'll be inserting a node before the TABLE, which
13758             // already had its style applied, some CSS selectors matches
13759             // might no more be valid (i.e. :first-child, DIV + TABLE),
13760             // so styles could change on the next re-rendering.
13761             // We don't check if we actually had such selectors as that
13762             // is complicated from here: we just set styles to be invalid
13763             // so they are re-computed once the DOM is fully built.
13764             _document->setNodeStylesInvalidIfLoading();
13765         }
13766         else {
13767             tag_accepted = AutoOpenClosePop( PARSER_STEP_TAG_OPENING, id );
13768         }
13769     }
13770     else {
13771         AutoClose( id, true );
13772     }
13773 
13774     // Set a flag for OnText to accumulate the content of any <HEAD><STYLE>
13775     // (We do that after the autoclose above, so that with <HEAD><META><STYLE>,
13776     // the META is properly closed and we find HEAD as the current node.)
13777     if ( id == el_style && _currNode && _currNode->getElement()->getNodeId() == el_head ) {
13778         _inHeadStyle = true;
13779     }
13780 
13781     // From now on, we don't create/close any elements, so expect
13782     // the next event to be OnTagBody (except OnTagAttribute)
13783     _tagBodyCalled = false;
13784 
13785     if ( !tag_accepted ) {
13786         // Don't create the element
13787         // If not accepted, the HTML parser will still call OnTagBody, and might
13788         // call OnTagAttribute before that. We should ignore them until OnTagBody.
13789         // No issue with OnTagClose, that can usually ignore stuff.
13790         _curTagIsIgnored = true;
13791         return _currNode ? _currNode->getElement() : NULL;
13792     }
13793 
13794     _currNode = new ldomElementWriter( _document, nsid, id, _currNode, insert_before_last_child );
13795     _flags = _currNode->getFlags();
13796 
13797     if ( insert_before_last_child ) {
13798         _curFosteredNode = _currNode;
13799     }
13800 
13801     if (_document->getDOMVersionRequested() >= 20200824 && id == el_p) {
13802         // To avoid checking DOM ancestors with the numerous tags that close a P
13803         _lastP = _currNode;
13804     }
13805 
13806     // Some libRu tweaks:
13807     if ( setParseAsPre ) {
13808         // Set an attribute on the DIV we just added
13809         _currNode->getElement()->setAttributeValue(LXML_NS_NONE, attr_ParserHint, U"ParseAsPre");
13810         // And set this global flag as we'll need to re-enable PRE (as it
13811         // will be reset by ldomDocumentWriter::OnTagBody() as we won't have
13812         // proper CSS white-space:pre inheritance) and XMLParser flags.
13813         _libRuParseAsPre = true;
13814     }
13815     if ( setDisplayNone ) {
13816         // Hide the FORM that was used to detect libRu,
13817         // now that currNode is the FORM element
13818         appendStyle( U"display: none" );
13819     }
13820 
13821     //logfile << " !o!\n";
13822     return _currNode->getElement();
13823 }
13824 
13825 /// called after > of opening tag (when entering tag body)
13826 // Note to avoid confusion: all tags HAVE a body (their content), so this
13827 // is called on all tags.
OnTagBody()13828 void ldomDocumentWriterFilter::OnTagBody()
13829 {
13830     _tagBodyCalled = true;
13831     if ( _curTagIsIgnored ) {
13832         _curTagIsIgnored = false; // Done with this ignored tag
13833         // We don't want ldomDocumentWriter::OnTagBody() to re-init
13834         // the current node styles (as we ignored this element,
13835         // _currNode is the previous node, already initNodeStyle()'d)
13836         return;
13837     }
13838 
13839     // This superclass OnTagBody() will initNodeStyle() on this node.
13840     // Some specific handling for the <BODY> tag to deal with HEAD STYLE
13841     // and LINK is also done there.
13842     ldomDocumentWriter::OnTagBody();
13843 
13844     if ( _curNodeIsSelfClosing ) {
13845         // Now that styles are set, we can close the element
13846         // Let's have it closed properly with flags correctly re set, and so
13847         // that specific handling in OnTagClose() is done (ex. for <LINK>)
13848         OnTagClose(NULL, NULL, true);
13849         return;
13850     }
13851 
13852     if ( _libRuDocumentDetected ) {
13853         if ( _libRuParseAsPre ) {
13854             // The OnTagBody() above might have cancelled TXTFLG_PRE
13855             // (that the ldomElementWriter inherited from its parent)
13856             // when ensuring proper CSS white-space inheritance.
13857             // Re-enable it
13858             _currNode->_flags |= TXTFLG_PRE;
13859             // Also set specific XMLParser flags so it spits out
13860             // <P>... for each paragraph of plain text, so that
13861             // we get some nice HTML instead
13862             _flags = TXTFLG_PRE | TXTFLG_PRE_PARA_SPLITTING | TXTFLG_TRIM;
13863         }
13864     }
13865 }
13866 
OnAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)13867 void ldomDocumentWriterFilter::OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue )
13868 {
13869     //logfile << "ldomDocumentWriter::OnAttribute() [" << nsname << ":" << attrname << "]";
13870     //if ( nsname && nsname[0] )
13871     //    lStr_lowercase( const_cast<lChar32 *>(nsname), lStr_len(nsname) );
13872     //lStr_lowercase( const_cast<lChar32 *>(attrname), lStr_len(attrname) );
13873 
13874     //CRLog::trace("OnAttribute(%s, %s)", LCSTR(lString32(attrname)), LCSTR(lString32(attrvalue)));
13875 
13876     if ( _curTagIsIgnored ) { // Ignore attributes if tag was ignored
13877         return;
13878     }
13879 
13880     // ldomDocumentWriterFilter is used for HTML/CHM/PDB (not with EPUBs).
13881     // We translate some attributes (now possibly deprecated) to their
13882     // CSS style equivalent, globally or for some elements only.
13883     // https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
13884     lUInt16 id = _currNode->_element->getNodeId();
13885 
13886     // Not sure this is to be done here: we get attributes as they are read,
13887     // so possibly before or after a style=, that the attribute may override.
13888     // Hopefully, a document use either one or the other.
13889     // (Alternative: in lvrend.cpp when used, as fallback when there is
13890     // none specified in node->getStyle().)
13891 
13892     // HTML align= => CSS text-align:
13893     // Done for all elements, except IMG and TABLE (for those, it should
13894     // translate to float:left/right, which is ensured by epub.css)
13895     // Should this be restricted to some specific elements?
13896     if ( !lStr_cmp(attrname, "align") && (id != el_img) && (id != el_table) ) {
13897         lString32 align = lString32(attrvalue).lowercase();
13898         if ( align == U"justify")
13899             appendStyle( U"text-align: justify" );
13900         else if ( align == U"left")
13901             appendStyle( U"text-align: left" );
13902         else if ( align == U"right")
13903             appendStyle( U"text-align: right" );
13904         else if ( align == U"center")
13905             appendStyle( U"text-align: center" );
13906        return;
13907     }
13908 
13909     // For the table & friends elements where we do support the following styles,
13910     // we translate these deprecated attributes to their style equivalents:
13911     //
13912     // HTML valign= => CSS vertical-align: only for TH & TD (as lvrend.cpp
13913     // only uses it with table cells (erm_final or erm_block))
13914     if (id == el_th || id == el_td) {
13915         // Default rendering for cells is valign=baseline
13916         if ( !lStr_cmp(attrname, "valign") ) {
13917             lString32 valign = lString32(attrvalue).lowercase();
13918             if ( valign == U"top" )
13919                 appendStyle( U"vertical-align: top" );
13920             else if ( valign == U"middle" )
13921                 appendStyle( U"vertical-align: middle" );
13922             else if ( valign == U"bottom")
13923                 appendStyle( U"vertical-align: bottom" );
13924            return;
13925         }
13926     }
13927     // HTML width= => CSS width: only for TH, TD and COL (as lvrend.cpp
13928     // only uses it with erm_table_column and table cells)
13929     // Note: with IMG, lvtextfm LFormattedText::AddSourceObject() only uses
13930     // style, and not attributes: <img width=100 height=50> would not be used.
13931     if (id == el_th || id == el_td || id == el_col) {
13932         if ( !lStr_cmp(attrname, "width") ) {
13933             lString32 val = lString32(attrvalue);
13934             const lChar32 * s = val.c_str();
13935             bool is_pct = false;
13936             int n=0;
13937             if (s && s[0]) {
13938                 for (int i=0; s[i]; i++) {
13939                     if (s[i]>='0' && s[i]<='9') {
13940                         n = n*10 + (s[i]-'0');
13941                     } else if (s[i] == '%') {
13942                         is_pct = true;
13943                         break;
13944                     }
13945                 }
13946                 if (n > 0) {
13947                     val = lString32("width: ");
13948                     val.appendDecimal(n);
13949                     val += is_pct ? "%" : "px"; // CSS pixels
13950                     appendStyle(val.c_str());
13951                 }
13952             }
13953             return;
13954         }
13955     }
13956 
13957     // Othewise, add the attribute
13958     lUInt16 attr_ns = (nsname && nsname[0]) ? _document->getNsNameIndex( nsname ) : 0;
13959     lUInt16 attr_id = (attrname && attrname[0]) ? _document->getAttrNameIndex( attrname ) : 0;
13960 
13961     _currNode->addAttribute( attr_ns, attr_id, attrvalue );
13962 
13963     //logfile << " !a!\n";
13964 }
13965 
13966 /// called on closing tag
OnTagClose(const lChar32 *,const lChar32 * tagname,bool self_closing_tag)13967 void ldomDocumentWriterFilter::OnTagClose( const lChar32 * /*nsname*/, const lChar32 * tagname, bool self_closing_tag )
13968 {
13969     if ( !_tagBodyCalled ) {
13970         CRLog::error("OnTagClose w/o parent's OnTagBody : %s", LCSTR(lString32(tagname)));
13971         crFatalError();
13972     }
13973     if ( !_currNode || !_currNode->getElement() ) {
13974         _errFlag = true;
13975         return;
13976     }
13977 
13978     //lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
13979     lUInt16 curNodeId = _currNode->getElement()->getNodeId();
13980     lUInt16 id = tagname ? _document->getElementNameIndex(tagname) : curNodeId;
13981     _errFlag |= (id != curNodeId); // (we seem to not do anything with _errFlag)
13982     // We should expect the tagname we got to be the same as curNode's element name,
13983     // but it looks like we may get an upper closing tag, that pop() or AutoClose()
13984     // below might handle. So, here below, we check that both id and curNodeId match
13985     // the element id we check for.
13986 
13987     if ( _libRuDocumentToDetect && id == el_div ) {
13988         // No need to try detecting after we see a closing </DIV>,
13989         // as the FORM we look for is in the first DIV
13990         _libRuDocumentToDetect = false;
13991     }
13992     if ( _libRuDocumentDetected && id == el_pre ) {
13993         // Also, if we're about to close the original PRE that we masqueraded
13994         // as DIV and that has enabled _libRuParseAsPre, reset it.
13995         // (In Lib.ru books, it seems a PRE is never closed, or only at
13996         // the end by another PRE where it doesn't matter if we keep that flag.)
13997         ldomNode * n = _currNode->getElement();
13998         if ( n->getNodeId() == el_div && n->hasAttribute( attr_ParserHint ) &&
13999                     n->getAttributeValue( attr_ParserHint ) == U"ParseAsPre" ) {
14000             _libRuParseAsPre = false;
14001         }
14002     }
14003 
14004     // Parse <link rel="stylesheet">, put the css file link in _stylesheetLinks,
14005     // they will be added to <body><stylesheet> when we meet <BODY>
14006     // (duplicated in ldomDocumentWriter::OnTagClose)
14007     if ( id == el_link && curNodeId == el_link ) { // link node
14008         ldomNode * n = _currNode->getElement();
14009         if ( n->getParentNode() && n->getParentNode()->getNodeId() == el_head &&
14010                  lString32(n->getAttributeValue("rel")).lowercase() == U"stylesheet" &&
14011                  lString32(n->getAttributeValue("type")).lowercase() == U"text/css" ) {
14012             lString32 href = n->getAttributeValue("href");
14013             lString32 stylesheetFile = LVCombinePaths( _document->getCodeBase(), href );
14014             CRLog::debug("Internal stylesheet file: %s", LCSTR(stylesheetFile));
14015             // We no more apply it immediately: it will be when <BODY> is met
14016             // _document->setDocStylesheetFileName(stylesheetFile);
14017             // _document->applyDocumentStyleSheet();
14018             _stylesheetLinks.add(stylesheetFile);
14019         }
14020     }
14021 
14022     // HTML title detection
14023     if ( id == el_title && curNodeId == el_title && _currNode->_element->getParentNode() &&
14024                            _currNode->_element->getParentNode()->getNodeId() == el_head ) {
14025         lString32 s = _currNode->_element->getText();
14026         s.trim();
14027         if ( !s.empty() ) {
14028             // TODO: split authors, title & series
14029             _document->getProps()->setString( DOC_PROP_TITLE, s );
14030         }
14031     }
14032 
14033     if (_document->getDOMVersionRequested() >= 20200824) { // A little bit more HTML5 conformance
14034         if ( _curNodeIsSelfClosing ) { // Internal call (not from XMLParser)
14035             _currNode = pop( _currNode, id );
14036             _curNodeIsSelfClosing = false;
14037         }
14038         else {
14039             if ( id == el_image )
14040                 id = el_img;
14041             AutoOpenClosePop( self_closing_tag ? PARSER_STEP_TAG_SELF_CLOSING : PARSER_STEP_TAG_CLOSING, id );
14042         }
14043     }
14044     else {
14045         //======== START FILTER CODE ============
14046         AutoClose( curNodeId, false );
14047         //======== END FILTER CODE ==============
14048         // save closed element
14049         // ldomNode * closedElement = _currNode->getElement();
14050         _currNode = pop( _currNode, id );
14051             // _currNode is now the parent
14052     }
14053 
14054     if ( _currNode ) {
14055         _flags = _currNode->getFlags();
14056         if ( _libRuParseAsPre ) {
14057             // Re-set specific parser flags
14058             _flags |= TXTFLG_PRE | TXTFLG_PRE_PARA_SPLITTING | TXTFLG_TRIM;
14059         }
14060     }
14061 
14062     if ( id==_stopTagId ) {
14063         //CRLog::trace("stop tag found, stopping...");
14064         _parser->Stop();
14065     }
14066     //logfile << " !c!\n";
14067 }
14068 
14069 /// called on text
OnText(const lChar32 * text,int len,lUInt32 flags)14070 void ldomDocumentWriterFilter::OnText( const lChar32 * text, int len, lUInt32 flags )
14071 {
14072     // Accumulate <HEAD><STYLE> content
14073     if (_inHeadStyle) {
14074         _headStyleText << lString32(text, len);
14075         _inHeadStyle = false;
14076         return;
14077     }
14078 
14079     if (_document->getDOMVersionRequested() >= 20200824) { // A little bit more HTML5 conformance
14080         // We can get text before any node (it should then have <html><body> emited before it),
14081         // but we might get spaces between " <html> <head> <title>The title <br>The content".
14082         // Try to handle that correctly.
14083         if ( !_bodyTagSeen ) {
14084             // While not yet in BODY, when in HTML or HEAD, ignore empty
14085             // text (as non empty text will create BODY)
14086             if ( !_currNode || _currNode->getElement()->isRoot() ||
14087                                _currNode->getElement()->getNodeId() == el_html ||
14088                                _currNode->getElement()->getNodeId() == el_head ) {
14089                 if ( !IsEmptySpace(text, len) ) {
14090                     // Non-empty text: have implicit HTML or BODY tags created and HEAD closed
14091                     AutoOpenClosePop( PARSER_STEP_TEXT, 0 );
14092                 }
14093             }
14094         }
14095     }
14096     //logfile << "lxmlDocumentWriter::OnText() fpos=" << fpos;
14097     if (_currNode)
14098     {
14099         lUInt16 curNodeId = _currNode->getElement()->getNodeId();
14100         if (_document->getDOMVersionRequested() < 20200824) {
14101             AutoClose( curNodeId, false );
14102         }
14103         if ( (_flags & XML_FLAG_NO_SPACE_TEXT)
14104              && IsEmptySpace(text, len) && !(flags & TXTFLG_PRE))
14105              return;
14106         bool insert_before_last_child = false;
14107         if (_document->getDOMVersionRequested() >= 20200824) {
14108             // If we're inserting text while in table sub-elements that
14109             // don't accept text, have it foster parented
14110             if ( curNodeId >= el_table && curNodeId <= el_tr && curNodeId != el_caption ) {
14111                 if ( !IsEmptySpace(text, len) ) {
14112                     if ( CheckAndEnsureFosterParenting(el_NULL) ) {
14113                         insert_before_last_child = true;
14114                     }
14115                 }
14116             }
14117         }
14118         else {
14119             // Previously, text in table sub-elements (only table elements and
14120             // self-closing elements have _allowText=false) had any text in between
14121             // table elements dropped (but not elements! with "<table>abc<div>def",
14122             // "abc" was dropped, but not "def")
14123             if ( !_currNode->_allowText )
14124                 return;
14125         }
14126         if ( !_libRuDocumentDetected ) {
14127             _currNode->onText( text, len, flags, insert_before_last_child );
14128         }
14129         else { // Lib.ru text cleanup
14130             if ( _libRuParagraphStart ) {
14131                 // Cleanup "Fine HTML": "<br>&nbsp; &nbsp; &nbsp; Viento fuerte, 1950"
14132                 while ( *text==160 && len > 0 ) {
14133                     text++;
14134                     len--;
14135                     while ( *text==' ' && len > 0 ) {
14136                         text++;
14137                         len--;
14138                     }
14139                 }
14140                 _libRuParagraphStart = false;
14141             }
14142             // Handle "Lib.ru html" paragraph, parsed from the nearly plaintext
14143             // by XMLParser with TXTFLG_PRE | TXTFLG_PRE_PARA_SPLITTING | TXTFLG_TRIM
14144             bool autoPara = flags & TXTFLG_PRE;
14145             int leftSpace = 0;
14146             const lChar32 * paraTag = NULL;
14147             bool isHr = false;
14148             if ( autoPara ) {
14149                 while ( (*text==' ' || *text=='\t' || *text==160) && len > 0 ) {
14150                     text++;
14151                     len--;
14152                     leftSpace += (*text == '\t') ? 8 : 1;
14153                 }
14154                 paraTag = leftSpace > 8 ? U"h2" : U"p";
14155                 lChar32 ch = 0;
14156                 bool sameCh = true;
14157                 for ( int i=0; i<len; i++ ) {
14158                     if ( !ch )
14159                         ch = text[i];
14160                     // We would need this to have HR work:
14161                     //   else if ( i == len-1 && text[i] == ' ' ) {
14162                     //      // Ignore a trailing space we may get
14163                     //      // Note that some HR might be missed when the
14164                     //      // "----" directly follows some indented text.
14165                     //   }
14166                     // but by fixing it, we'd remove a P and have XPointers
14167                     // like /html/body/div/p[14]/text().113 reference the wrong P,
14168                     // so keep doing bad to not mess past highlights...
14169                     else if ( ch != text[i] ) {
14170                         sameCh = false;
14171                         break;
14172                     }
14173                 }
14174                 if ( !ch )
14175                     sameCh = false;
14176                 if ( (ch=='-' || ch=='=' || ch=='_' || ch=='*' || ch=='#') && sameCh )
14177                     isHr = true;
14178             }
14179             if ( isHr ) {
14180                 OnTagOpen( NULL, U"hr" );
14181                 OnTagBody();
14182                 OnTagClose( NULL, U"hr" );
14183             } else if ( len > 0 ) {
14184                 if ( autoPara ) {
14185                     OnTagOpen( NULL, paraTag );
14186                     OnTagBody();
14187                 }
14188                 _currNode->onText( text, len, flags, insert_before_last_child );
14189                 if ( autoPara )
14190                     OnTagClose( NULL, paraTag );
14191             }
14192         }
14193         if ( insert_before_last_child ) {
14194             // We have no _curFosteredNode to pop, so just restore
14195             // the previous table node
14196             _currNode = _curNodeBeforeFostering;
14197             _curNodeBeforeFostering = NULL;
14198             _curFosteredNode = NULL;
14199         }
14200     }
14201     //logfile << " !t!\n";
14202 }
14203 
ldomDocumentWriterFilter(ldomDocument * document,bool headerOnly,const char *** rules)14204 ldomDocumentWriterFilter::ldomDocumentWriterFilter(ldomDocument * document, bool headerOnly, const char *** rules )
14205 : ldomDocumentWriter( document, headerOnly )
14206 , _libRuDocumentToDetect(true)
14207 , _libRuDocumentDetected(false)
14208 , _libRuParagraphStart(false)
14209 , _libRuParseAsPre(false)
14210 , _styleAttrId(0)
14211 , _classAttrId(0)
14212 , _tagBodyCalled(true)
14213 , _htmlTagSeen(false)
14214 , _headTagSeen(false)
14215 , _bodyTagSeen(false)
14216 , _curNodeIsSelfClosing(false)
14217 , _curTagIsIgnored(false)
14218 , _curNodeBeforeFostering(NULL)
14219 , _curFosteredNode(NULL)
14220 , _lastP(NULL)
14221 {
14222     if (_document->getDOMVersionRequested() >= 20200824) {
14223         // We're not using the provided rules, but hardcoded ones in AutoOpenClosePop()
14224         return;
14225     }
14226     lUInt16 i;
14227     for ( i=0; i<MAX_ELEMENT_TYPE_ID; i++ )
14228         _rules[i] = NULL;
14229     lUInt16 items[MAX_ELEMENT_TYPE_ID];
14230     for ( i=0; rules[i]; i++ ) {
14231         const char ** rule = rules[i];
14232         lUInt16 j;
14233         for ( j=0; rule[j] && j<MAX_ELEMENT_TYPE_ID; j++ ) {
14234             const char * s = rule[j];
14235             items[j] = _document->getElementNameIndex( lString32(s).c_str() );
14236         }
14237         if ( j>=1 ) {
14238             lUInt16 id = items[0];
14239             _rules[ id ] = new lUInt16[j];
14240             for ( int k=0; k<j; k++ ) {
14241                 _rules[id][k] = k==j-1 ? 0 : items[k+1];
14242             }
14243         }
14244     }
14245 }
14246 
~ldomDocumentWriterFilter()14247 ldomDocumentWriterFilter::~ldomDocumentWriterFilter()
14248 {
14249     if (_document->getDOMVersionRequested() >= 20200824) {
14250         return;
14251     }
14252     for ( int i=0; i<MAX_ELEMENT_TYPE_ID; i++ ) {
14253         if ( _rules[i] )
14254             delete[] _rules[i];
14255     }
14256 }
14257 
14258 #if BUILD_LITE!=1
14259 static const char * doc_file_magic = "CR3\n";
14260 
14261 
serialize(SerialBuf & hdrbuf)14262 bool lxmlDocBase::DocFileHeader::serialize( SerialBuf & hdrbuf )
14263 {
14264     int start = hdrbuf.pos();
14265     hdrbuf.putMagic( doc_file_magic );
14266     //CRLog::trace("Serializing render data: %d %d %d %d", render_dx, render_dy, render_docflags, render_style_hash);
14267     hdrbuf << render_dx << render_dy << render_docflags << render_style_hash << stylesheet_hash << node_displaystyle_hash;
14268 
14269     hdrbuf.putCRC( hdrbuf.pos() - start );
14270 
14271 #if 0
14272     {
14273         lString8 s;
14274         s<<"SERIALIZED HDR BUF: ";
14275         for ( int i=0; i<hdrbuf.pos(); i++ ) {
14276             char tmp[20];
14277             sprintf(tmp, "%02x ", hdrbuf.buf()[i]);
14278             s<<tmp;
14279         }
14280         CRLog::trace(s.c_str());
14281     }
14282 #endif
14283     return !hdrbuf.error();
14284 }
14285 
deserialize(SerialBuf & hdrbuf)14286 bool lxmlDocBase::DocFileHeader::deserialize( SerialBuf & hdrbuf )
14287 {
14288     int start = hdrbuf.pos();
14289     hdrbuf.checkMagic( doc_file_magic );
14290     if ( hdrbuf.error() ) {
14291         CRLog::error("Swap file Magic signature doesn't match");
14292         return false;
14293     }
14294     hdrbuf >> render_dx >> render_dy >> render_docflags >> render_style_hash >> stylesheet_hash >> node_displaystyle_hash;
14295     //CRLog::trace("Deserialized render data: %d %d %d %d", render_dx, render_dy, render_docflags, render_style_hash);
14296     hdrbuf.checkCRC( hdrbuf.pos() - start );
14297     if ( hdrbuf.error() ) {
14298         CRLog::error("Swap file - header unpack error");
14299         return false;
14300     }
14301     return true;
14302 }
14303 #endif
14304 
setDocFlag(lUInt32 mask,bool value)14305 void tinyNodeCollection::setDocFlag( lUInt32 mask, bool value )
14306 {
14307     CRLog::debug("setDocFlag(%04x, %s)", mask, value?"true":"false");
14308     if ( value )
14309         _docFlags |= mask;
14310     else
14311         _docFlags &= ~mask;
14312 }
14313 
setDocFlags(lUInt32 value)14314 void tinyNodeCollection::setDocFlags( lUInt32 value )
14315 {
14316     CRLog::debug("setDocFlags(%04x)", value);
14317     _docFlags = value;
14318 }
14319 
getPersistenceFlags()14320 int tinyNodeCollection::getPersistenceFlags()
14321 {
14322     int format = 2; //getProps()->getIntDef(DOC_PROP_FILE_FORMAT, 0);
14323     int flag = ( format==2 && getDocFlag(DOC_FLAG_PREFORMATTED_TEXT) ) ? 1 : 0;
14324     CRLog::trace("getPersistenceFlags() returned %d", flag);
14325     return flag;
14326 }
14327 
clear()14328 void ldomDocument::clear()
14329 {
14330 #if BUILD_LITE!=1
14331     clearRendBlockCache();
14332     _rendered = false;
14333     _urlImageMap.clear();
14334     _fontList.clear();
14335     fontMan->UnregisterDocumentFonts(_docIndex);
14336 #endif
14337     //TODO: implement clear
14338     //_elemStorage.
14339 }
14340 
14341 #if BUILD_LITE!=1
openFromCache(CacheLoadingCallback * formatCallback,LVDocViewCallback * progressCallback)14342 bool ldomDocument::openFromCache( CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback )
14343 {
14344     setCacheFileStale(true);
14345     if ( !openCacheFile() ) {
14346         CRLog::info("Cannot open document from cache. Need to read fully");
14347         clear();
14348         return false;
14349     }
14350     if ( !loadCacheFileContent(formatCallback, progressCallback) ) {
14351         CRLog::info("Error while loading document content from cache file.");
14352         clear();
14353         return false;
14354     }
14355 #if 0
14356     LVStreamRef s = LVOpenFileStream("/tmp/test.xml", LVOM_WRITE);
14357     if ( !s.isNull() )
14358         saveToStream(s, "UTF8");
14359 #endif
14360     _mapped = true;
14361     _rendered = true;
14362     _just_rendered_from_cache = true;
14363     _toc_from_cache_valid = true;
14364     // Use cached node_displaystyle_hash as _nodeDisplayStyleHashInitial, as it
14365     // should be in sync with the DOM stored in the cache
14366     _nodeDisplayStyleHashInitial = _hdr.node_displaystyle_hash;
14367     CRLog::info("Initializing _nodeDisplayStyleHashInitial from cache file: %x", _nodeDisplayStyleHashInitial);
14368 
14369     setCacheFileStale(false);
14370     return true;
14371 }
14372 
14373 /// load document cache file content, @see saveChanges()
loadCacheFileContent(CacheLoadingCallback * formatCallback,LVDocViewCallback * progressCallback)14374 bool ldomDocument::loadCacheFileContent(CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback)
14375 {
14376 
14377     CRLog::trace("ldomDocument::loadCacheFileContent()");
14378     {
14379         if (progressCallback) progressCallback->OnLoadFileProgress(5);
14380         SerialBuf propsbuf(0, true);
14381         if ( !_cacheFile->read( CBT_PROP_DATA, propsbuf ) ) {
14382             CRLog::error("Error while reading props data");
14383             return false;
14384         }
14385         getProps()->deserialize( propsbuf );
14386         if ( propsbuf.error() ) {
14387             CRLog::error("Cannot decode property table for document");
14388             return false;
14389         }
14390 
14391         if ( formatCallback ) {
14392             int fmt = getProps()->getIntDef(DOC_PROP_FILE_FORMAT_ID,
14393                     doc_format_fb2);
14394             if (fmt < doc_format_fb2 || fmt > doc_format_max)
14395                 fmt = doc_format_fb2;
14396             // notify about format detection, to allow setting format-specific CSS
14397             formatCallback->OnCacheFileFormatDetected((doc_format_t)fmt);
14398         }
14399 
14400         if (progressCallback) progressCallback->OnLoadFileProgress(10);
14401         CRLog::trace("ldomDocument::loadCacheFileContent() - ID data");
14402         SerialBuf idbuf(0, true);
14403         if ( !_cacheFile->read( CBT_MAPS_DATA, idbuf ) ) {
14404             CRLog::error("Error while reading Id data");
14405             return false;
14406         }
14407         deserializeMaps( idbuf );
14408         if ( idbuf.error() ) {
14409             CRLog::error("Cannot decode ID table for document");
14410             return false;
14411         }
14412 
14413         if (progressCallback) progressCallback->OnLoadFileProgress(15);
14414         CRLog::trace("ldomDocument::loadCacheFileContent() - page data");
14415         SerialBuf pagebuf(0, true);
14416         if ( !_cacheFile->read( CBT_PAGE_DATA, pagebuf ) ) {
14417             CRLog::error("Error while reading pages data");
14418             return false;
14419         }
14420         pagebuf.swap( _pagesData );
14421         _pagesData.setPos( 0 );
14422         LVRendPageList pages;
14423         pages.deserialize(_pagesData);
14424         if ( _pagesData.error() ) {
14425             CRLog::error("Page data deserialization is failed");
14426             return false;
14427         }
14428         CRLog::info("%d pages read from cache file", pages.length());
14429         //_pagesData.setPos( 0 );
14430 
14431         if (progressCallback) progressCallback->OnLoadFileProgress(20);
14432         CRLog::trace("ldomDocument::loadCacheFileContent() - embedded font data");
14433         {
14434             SerialBuf buf(0, true);
14435             if ( !_cacheFile->read(CBT_FONT_DATA, buf)) {
14436                 CRLog::error("Error while reading font data");
14437                 return false;
14438             }
14439             if (!_fontList.deserialize(buf)) {
14440                 CRLog::error("Error while parsing font data");
14441                 return false;
14442             }
14443             registerEmbeddedFonts();
14444         }
14445 
14446         if (progressCallback) progressCallback->OnLoadFileProgress(25);
14447         DocFileHeader h = {};
14448         SerialBuf hdrbuf(0,true);
14449         if ( !_cacheFile->read( CBT_REND_PARAMS, hdrbuf ) ) {
14450             CRLog::error("Error while reading header data");
14451             return false;
14452         } else if ( !h.deserialize(hdrbuf) ) {
14453             CRLog::error("Header data deserialization is failed");
14454             return false;
14455         }
14456         _hdr = h;
14457         CRLog::info("Loaded render properties: styleHash=%x, stylesheetHash=%x, docflags=%x, width=%x, height=%x, nodeDisplayStyleHash=%x",
14458                 _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy, _hdr.node_displaystyle_hash);
14459     }
14460 
14461     if (progressCallback) progressCallback->OnLoadFileProgress(30);
14462     CRLog::trace("ldomDocument::loadCacheFileContent() - node data");
14463     if ( !loadNodeData() ) {
14464         CRLog::error("Error while reading node instance data");
14465         return false;
14466     }
14467 
14468     if (progressCallback) progressCallback->OnLoadFileProgress(40);
14469     CRLog::trace("ldomDocument::loadCacheFileContent() - element storage");
14470     if ( !_elemStorage.load() ) {
14471         CRLog::error("Error while loading element data");
14472         return false;
14473     }
14474     if (progressCallback) progressCallback->OnLoadFileProgress(50);
14475     CRLog::trace("ldomDocument::loadCacheFileContent() - text storage");
14476     if ( !_textStorage.load() ) {
14477         CRLog::error("Error while loading text data");
14478         return false;
14479     }
14480     if (progressCallback) progressCallback->OnLoadFileProgress(60);
14481     CRLog::trace("ldomDocument::loadCacheFileContent() - rect storage");
14482     if ( !_rectStorage.load() ) {
14483         CRLog::error("Error while loading rect data");
14484         return false;
14485     }
14486     if (progressCallback) progressCallback->OnLoadFileProgress(70);
14487     CRLog::trace("ldomDocument::loadCacheFileContent() - node style storage");
14488     if ( !_styleStorage.load() ) {
14489         CRLog::error("Error while loading node style data");
14490         return false;
14491     }
14492 
14493     if (progressCallback) progressCallback->OnLoadFileProgress(80);
14494     CRLog::trace("ldomDocument::loadCacheFileContent() - TOC");
14495     {
14496         SerialBuf tocbuf(0,true);
14497         if ( !_cacheFile->read( CBT_TOC_DATA, tocbuf ) ) {
14498             CRLog::error("Error while reading TOC data");
14499             return false;
14500         } else if ( !m_toc.deserialize(this, tocbuf) ) {
14501             CRLog::error("TOC data deserialization is failed");
14502             return false;
14503         }
14504     }
14505     if (progressCallback) progressCallback->OnLoadFileProgress(85);
14506     CRLog::trace("ldomDocument::loadCacheFileContent() - PageMap");
14507     {
14508         SerialBuf pagemapbuf(0,true);
14509         if ( !_cacheFile->read( CBT_PAGEMAP_DATA, pagemapbuf ) ) {
14510             CRLog::error("Error while reading PageMap data");
14511             return false;
14512         } else if ( !m_pagemap.deserialize(this, pagemapbuf) ) {
14513             CRLog::error("PageMap data deserialization is failed");
14514             return false;
14515         }
14516     }
14517 
14518 
14519     if (progressCallback) progressCallback->OnLoadFileProgress(90);
14520     if ( loadStylesData() ) {
14521         CRLog::trace("ldomDocument::loadCacheFileContent() - using loaded styles");
14522         updateLoadedStyles( true );
14523 //        lUInt32 styleHash = calcStyleHash();
14524 //        styleHash = styleHash * 31 + calcGlobalSettingsHash();
14525 //        CRLog::debug("Loaded style hash: %x", styleHash);
14526 //        lUInt32 styleHash = calcStyleHash();
14527 //        CRLog::info("Loaded style hash = %08x", styleHash);
14528     } else {
14529         CRLog::trace("ldomDocument::loadCacheFileContent() - style loading failed: will reinit ");
14530         updateLoadedStyles( false );
14531     }
14532 
14533     CRLog::trace("ldomDocument::loadCacheFileContent() - completed successfully");
14534     if (progressCallback) progressCallback->OnLoadFileProgress(95);
14535 
14536     return true;
14537 }
14538 
14539 static const char * styles_magic = "CRSTYLES";
14540 
14541 #define CHECK_EXPIRATION(s) \
14542     if ( maxTime.expired() ) { CRLog::info("timer expired while " s); return CR_TIMEOUT; }
14543 
14544 /// saves changes to cache file, limited by time interval (can be called again to continue after TIMEOUT)
saveChanges(CRTimerUtil & maxTime,LVDocViewCallback * progressCallback)14545 ContinuousOperationResult ldomDocument::saveChanges( CRTimerUtil & maxTime, LVDocViewCallback * progressCallback )
14546 {
14547     if ( !_cacheFile )
14548         return CR_DONE;
14549 
14550     if (progressCallback) progressCallback->OnSaveCacheFileStart();
14551 
14552     if (maxTime.infinite()) {
14553         _mapSavingStage = 0; // all stages from the beginning
14554         _cacheFile->setAutoSyncSize(0);
14555     } else {
14556         //CRLog::trace("setting autosync");
14557         _cacheFile->setAutoSyncSize(STREAM_AUTO_SYNC_SIZE);
14558         //CRLog::trace("setting autosync - done");
14559     }
14560 
14561     CRLog::trace("ldomDocument::saveChanges(timeout=%d stage=%d)", maxTime.interval(), _mapSavingStage);
14562     setCacheFileStale(true);
14563 
14564     switch (_mapSavingStage) {
14565     default:
14566     case 0:
14567 
14568         if (!maxTime.infinite())
14569             _cacheFile->flush(false, maxTime);
14570         CHECK_EXPIRATION("flushing of stream")
14571 
14572         persist( maxTime );
14573         CHECK_EXPIRATION("persisting of node data")
14574         if (progressCallback) progressCallback->OnSaveCacheFileProgress(0);
14575 
14576         // fall through
14577     case 1:
14578         _mapSavingStage = 1;
14579         CRLog::trace("ldomDocument::saveChanges() - element storage");
14580 
14581         if ( !_elemStorage.save(maxTime) ) {
14582             CRLog::error("Error while saving element data");
14583             return CR_ERROR;
14584         }
14585         CHECK_EXPIRATION("saving element storate")
14586         if (progressCallback) progressCallback->OnSaveCacheFileProgress(10);
14587         // fall through
14588     case 2:
14589         _mapSavingStage = 2;
14590         CRLog::trace("ldomDocument::saveChanges() - text storage");
14591         if ( !_textStorage.save(maxTime) ) {
14592             CRLog::error("Error while saving text data");
14593             return CR_ERROR;
14594         }
14595         CHECK_EXPIRATION("saving text storate")
14596         if (progressCallback) progressCallback->OnSaveCacheFileProgress(20);
14597         // fall through
14598     case 3:
14599         _mapSavingStage = 3;
14600         CRLog::trace("ldomDocument::saveChanges() - rect storage");
14601 
14602         if ( !_rectStorage.save(maxTime) ) {
14603             CRLog::error("Error while saving rect data");
14604             return CR_ERROR;
14605         }
14606         CHECK_EXPIRATION("saving rect storate")
14607         if (progressCallback) progressCallback->OnSaveCacheFileProgress(30);
14608         // fall through
14609     case 41:
14610         _mapSavingStage = 41;
14611         CRLog::trace("ldomDocument::saveChanges() - blob storage data");
14612 
14613         if ( _blobCache.saveToCache(maxTime) == CR_ERROR ) {
14614             CRLog::error("Error while saving blob storage data");
14615             return CR_ERROR;
14616         }
14617         if (!maxTime.infinite())
14618             _cacheFile->flush(false, maxTime); // intermediate flush
14619         CHECK_EXPIRATION("saving blob storage data")
14620         if (progressCallback) progressCallback->OnSaveCacheFileProgress(35);
14621         // fall through
14622     case 4:
14623         _mapSavingStage = 4;
14624         CRLog::trace("ldomDocument::saveChanges() - node style storage");
14625 
14626         if ( !_styleStorage.save(maxTime) ) {
14627             CRLog::error("Error while saving node style data");
14628             return CR_ERROR;
14629         }
14630         if (!maxTime.infinite())
14631             _cacheFile->flush(false, maxTime); // intermediate flush
14632         CHECK_EXPIRATION("saving node style storage")
14633         if (progressCallback) progressCallback->OnSaveCacheFileProgress(40);
14634         // fall through
14635     case 5:
14636         _mapSavingStage = 5;
14637         CRLog::trace("ldomDocument::saveChanges() - misc data");
14638         {
14639             SerialBuf propsbuf(4096);
14640             getProps()->serialize( propsbuf );
14641             if ( !_cacheFile->write( CBT_PROP_DATA, propsbuf, COMPRESS_MISC_DATA ) ) {
14642                 CRLog::error("Error while saving props data");
14643                 return CR_ERROR;
14644             }
14645         }
14646         if (!maxTime.infinite())
14647             _cacheFile->flush(false, maxTime); // intermediate flush
14648         CHECK_EXPIRATION("saving props data")
14649         if (progressCallback) progressCallback->OnSaveCacheFileProgress(45);
14650         // fall through
14651     case 6:
14652         _mapSavingStage = 6;
14653         CRLog::trace("ldomDocument::saveChanges() - ID data");
14654         {
14655             SerialBuf idbuf(4096);
14656             serializeMaps( idbuf );
14657             if ( !_cacheFile->write( CBT_MAPS_DATA, idbuf, COMPRESS_MISC_DATA ) ) {
14658                 CRLog::error("Error while saving Id data");
14659                 return CR_ERROR;
14660             }
14661         }
14662         if (!maxTime.infinite())
14663             _cacheFile->flush(false, maxTime); // intermediate flush
14664         CHECK_EXPIRATION("saving ID data")
14665         if (progressCallback) progressCallback->OnSaveCacheFileProgress(50);
14666         // fall through
14667     case 7:
14668         _mapSavingStage = 7;
14669         if ( _pagesData.pos() ) {
14670             CRLog::trace("ldomDocument::saveChanges() - page data (%d bytes)", _pagesData.pos());
14671             if ( !_cacheFile->write( CBT_PAGE_DATA, _pagesData, COMPRESS_PAGES_DATA  ) ) {
14672                 CRLog::error("Error while saving pages data");
14673                 return CR_ERROR;
14674             }
14675         } else {
14676             CRLog::trace("ldomDocument::saveChanges() - no page data");
14677         }
14678         if (!maxTime.infinite())
14679             _cacheFile->flush(false, maxTime); // intermediate flush
14680         CHECK_EXPIRATION("saving page data")
14681         if (progressCallback) progressCallback->OnSaveCacheFileProgress(60);
14682         // fall through
14683     case 8:
14684         _mapSavingStage = 8;
14685 
14686         CRLog::trace("ldomDocument::saveChanges() - node data");
14687         if ( !saveNodeData() ) {
14688             CRLog::error("Error while node instance data");
14689             return CR_ERROR;
14690         }
14691         if (!maxTime.infinite())
14692             _cacheFile->flush(false, maxTime); // intermediate flush
14693         CHECK_EXPIRATION("saving node data")
14694         if (progressCallback) progressCallback->OnSaveCacheFileProgress(70);
14695         // fall through
14696     case 9:
14697         _mapSavingStage = 9;
14698         CRLog::trace("ldomDocument::saveChanges() - render info");
14699         {
14700             SerialBuf hdrbuf(0,true);
14701             if ( !_hdr.serialize(hdrbuf) ) {
14702                 CRLog::error("Header data serialization is failed");
14703                 return CR_ERROR;
14704             } else if ( !_cacheFile->write( CBT_REND_PARAMS, hdrbuf, false ) ) {
14705                 CRLog::error("Error while writing header data");
14706                 return CR_ERROR;
14707             }
14708         }
14709         CRLog::info("Saving render properties: styleHash=%x, stylesheetHash=%x, docflags=%x, width=%x, height=%x, nodeDisplayStyleHash=%x",
14710                     _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy, _hdr.node_displaystyle_hash);
14711         if (progressCallback) progressCallback->OnSaveCacheFileProgress(73);
14712 
14713         CRLog::trace("ldomDocument::saveChanges() - TOC");
14714         {
14715             SerialBuf tocbuf(0,true);
14716             if ( !m_toc.serialize(tocbuf) ) {
14717                 CRLog::error("TOC data serialization is failed");
14718                 return CR_ERROR;
14719             } else if ( !_cacheFile->write( CBT_TOC_DATA, tocbuf, COMPRESS_TOC_DATA ) ) {
14720                 CRLog::error("Error while writing TOC data");
14721                 return CR_ERROR;
14722             }
14723         }
14724         if (progressCallback) progressCallback->OnSaveCacheFileProgress(76);
14725 
14726         CRLog::trace("ldomDocument::saveChanges() - PageMap");
14727         {
14728             SerialBuf pagemapbuf(0,true);
14729             if ( !m_pagemap.serialize(pagemapbuf) ) {
14730                 CRLog::error("PageMap data serialization is failed");
14731                 return CR_ERROR;
14732             } else if ( !_cacheFile->write( CBT_PAGEMAP_DATA, pagemapbuf, COMPRESS_PAGEMAP_DATA ) ) {
14733                 CRLog::error("Error while writing PageMap data");
14734                 return CR_ERROR;
14735             }
14736         }
14737         if (!maxTime.infinite())
14738             _cacheFile->flush(false, maxTime); // intermediate flush
14739         CHECK_EXPIRATION("saving TOC data")
14740         if (progressCallback) progressCallback->OnSaveCacheFileProgress(80);
14741         // fall through
14742     case 10:
14743         _mapSavingStage = 10;
14744 
14745         if ( !saveStylesData() ) {
14746             CRLog::error("Error while writing style data");
14747             return CR_ERROR;
14748         }
14749         if (progressCallback) progressCallback->OnSaveCacheFileProgress(90);
14750         // fall through
14751     case 11:
14752         _mapSavingStage = 11;
14753         CRLog::trace("ldomDocument::saveChanges() - embedded fonts");
14754         {
14755             SerialBuf buf(4096);
14756             _fontList.serialize(buf);
14757             if (!_cacheFile->write(CBT_FONT_DATA, buf, COMPRESS_MISC_DATA) ) {
14758                 CRLog::error("Error while saving embedded font data");
14759                 return CR_ERROR;
14760             }
14761             CHECK_EXPIRATION("saving embedded fonts")
14762         }
14763         if (progressCallback) progressCallback->OnSaveCacheFileProgress(95);
14764         // fall through
14765     case 12:
14766         _mapSavingStage = 12;
14767         CRLog::trace("ldomDocument::saveChanges() - flush");
14768         {
14769             CRTimerUtil infinite;
14770             if ( !_cacheFile->flush(true, infinite) ) {
14771                 CRLog::error("Error while updating index of cache file");
14772                 return CR_ERROR;
14773             }
14774             CHECK_EXPIRATION("flushing")
14775         }
14776         if (progressCallback) progressCallback->OnSaveCacheFileProgress(100);
14777         // fall through
14778     case 13:
14779         _mapSavingStage = 13;
14780         setCacheFileStale(false);
14781     }
14782     CRLog::trace("ldomDocument::saveChanges() - done");
14783     if (progressCallback) progressCallback->OnSaveCacheFileEnd();
14784     return CR_DONE;
14785 }
14786 
14787 /// save changes to cache file, @see loadCacheFileContent()
saveChanges()14788 bool ldomDocument::saveChanges()
14789 {
14790     if ( !_cacheFile )
14791         return true;
14792     CRLog::debug("ldomDocument::saveChanges() - infinite");
14793     CRTimerUtil timerNoLimit;
14794     ContinuousOperationResult res = saveChanges(timerNoLimit);
14795     return res!=CR_ERROR;
14796 }
14797 
saveStylesData()14798 bool tinyNodeCollection::saveStylesData()
14799 {
14800     SerialBuf stylebuf(0, true);
14801     lUInt32 stHash = _stylesheet.getHash();
14802     LVArray<css_style_ref_t> * list = _styles.getIndex();
14803     stylebuf.putMagic(styles_magic);
14804     stylebuf << stHash;
14805     stylebuf << (lUInt32)list->length(); // index
14806     for ( int i=0; i<list->length(); i++ ) {
14807         css_style_ref_t rec = list->get(i);
14808         if ( !rec.isNull() ) {
14809             stylebuf << (lUInt32)i; // index
14810             rec->serialize( stylebuf ); // style
14811         }
14812     }
14813     stylebuf << (lUInt32)0; // index=0 is end list mark
14814     stylebuf.putMagic(styles_magic);
14815     delete list;
14816     if ( stylebuf.error() )
14817         return false;
14818     CRLog::trace("Writing style data: %d bytes", stylebuf.pos());
14819     if ( !_cacheFile->write( CBT_STYLE_DATA, stylebuf, COMPRESS_STYLE_DATA) ) {
14820         return false;
14821     }
14822     return !stylebuf.error();
14823 }
14824 
loadStylesData()14825 bool tinyNodeCollection::loadStylesData()
14826 {
14827     SerialBuf stylebuf(0, true);
14828     if ( !_cacheFile->read( CBT_STYLE_DATA, stylebuf ) ) {
14829         CRLog::error("Error while reading style data");
14830         return false;
14831     }
14832     lUInt32 stHash = 0;
14833     lInt32 len = 0;
14834 
14835     // lUInt32 myHash = _stylesheet.getHash();
14836     // When loading from cache, this stylesheet was built with the
14837     // initial element name ids, which may have been replaced by
14838     // the one restored from the cache. So, its hash may be different
14839     // from the one we're going to load from cache.
14840     // This is not a failure, but a sign the stylesheet will have
14841     // to be regenerated (later, no need for it currently as we're
14842     // loading previously applied style data): this will be checked
14843     // in checkRenderContext() when comparing a combo hash
14844     // against _hdr.stylesheet_hash fetched from the cache.
14845 
14846     //LVArray<css_style_ref_t> * list = _styles.getIndex();
14847     stylebuf.checkMagic(styles_magic);
14848     stylebuf >> stHash;
14849     // Don't check for this:
14850     // if ( stHash != myHash ) {
14851     //     CRLog::info("tinyNodeCollection::loadStylesData() - stylesheet hash is changed: skip loading styles");
14852     //     return false;
14853     // }
14854     stylebuf >> len; // index
14855     if ( stylebuf.error() )
14856         return false;
14857     LVArray<css_style_ref_t> list(len, css_style_ref_t());
14858     for ( int i=0; i<list.length(); i++ ) {
14859         lUInt32 index = 0;
14860         stylebuf >> index; // index
14861         if ( index<=0 || (int)index>=len || stylebuf.error() )
14862             break;
14863         css_style_ref_t rec( new css_style_rec_t() );
14864         if ( !rec->deserialize(stylebuf) )
14865             break;
14866         list.set( index, rec );
14867     }
14868     stylebuf.checkMagic(styles_magic);
14869     if ( stylebuf.error() )
14870         return false;
14871 
14872     CRLog::trace("Setting style data: %d bytes", stylebuf.size());
14873     _styles.setIndex( list );
14874 
14875     return !stylebuf.error();
14876 }
14877 
calcStyleHash(bool already_rendered)14878 lUInt32 tinyNodeCollection::calcStyleHash(bool already_rendered)
14879 {
14880     CRLog::debug("calcStyleHash start");
14881 //    int maxlog = 20;
14882     lUInt32 res = 0; //_elemCount;
14883     lUInt32 globalHash = calcGlobalSettingsHash(getFontContextDocIndex(), already_rendered);
14884     lUInt32 docFlags = getDocFlags();
14885     //CRLog::info("Calculating style hash...  elemCount=%d, globalHash=%08x, docFlags=%08x", _elemCount, globalHash, docFlags);
14886     if (_nodeStyleHash) {
14887         // Re-use saved _nodeStyleHash if it has not been invalidated,
14888         // as the following loop can be expensive
14889         res = _nodeStyleHash;
14890         CRLog::debug("  using saved _nodeStyleHash %x", res);
14891     }
14892     else {
14893         // We also compute _nodeDisplayStyleHash from each node style->display. It
14894         // may not change as often as _nodeStyleHash, but if it does, it means
14895         // some nodes switched between 'block' and 'inline', and that some autoBoxing
14896         // that may have been added should no more be in the DOM for a correct
14897         // rendering: in that case, the user will have to reload the document, and
14898         // we should invalidate the cache so a new correct DOM is build on load.
14899         _nodeDisplayStyleHash = 0;
14900 
14901         int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
14902         for ( int i=0; i<count; i++ ) {
14903             int offs = i*TNC_PART_LEN;
14904             int sz = TNC_PART_LEN;
14905             if ( offs + sz > _elemCount+1 ) {
14906                 sz = _elemCount+1 - offs;
14907             }
14908             ldomNode * buf = _elemList[i];
14909             if ( !buf ) continue; // avoid clang-tidy warning
14910             for ( int j=0; j<sz; j++ ) {
14911                 if ( buf[j].isElement() ) {
14912                     css_style_ref_t style = buf[j].getStyle();
14913                     lUInt32 sh = calcHash( style );
14914                     res = res * 31 + sh;
14915                     if (!style.isNull()) {
14916                         _nodeDisplayStyleHash = _nodeDisplayStyleHash * 31 + style.get()->display;
14917                         // Also account in this hash if this node is "white_space: pre" or alike.
14918                         // If white_space changes from/to "pre"-like to/from "normal"-like,
14919                         // the document will need to be reloaded so that the HTML text parts
14920                         // are parsed according the the PRE/not-PRE rules
14921                         if (style.get()->white_space >= css_ws_pre_line)
14922                             _nodeDisplayStyleHash += 29;
14923                         // Also account for style->float_, as it should create/remove new floatBox
14924                         // elements wrapping floats when toggling BLOCK_RENDERING_ENHANCED
14925                         if (style.get()->float_ > css_f_none)
14926                             _nodeDisplayStyleHash += 123;
14927                     }
14928                     //printf("element %d %d style hash: %x\n", i, j, sh);
14929                     LVFontRef font = buf[j].getFont();
14930                     lUInt32 fh = calcHash( font );
14931                     res = res * 31 + fh;
14932                     //printf("element %d %d font hash: %x\n", i, j, fh);
14933 //                    if ( maxlog>0 && sh==0 ) {
14934 //                        style = buf[j].getStyle();
14935 //                        CRLog::trace("[%06d] : s=%08x f=%08x  res=%08x", offs+j, sh, fh, res);
14936 //                        maxlog--;
14937 //                    }
14938                 }
14939             }
14940         }
14941 
14942         CRLog::debug("  COMPUTED _nodeStyleHash %x", res);
14943         _nodeStyleHash = res;
14944         CRLog::debug("  COMPUTED _nodeDisplayStyleHash %x (initial: %x)", _nodeDisplayStyleHash, _nodeDisplayStyleHashInitial);
14945     }
14946     CRLog::info("Calculating style hash...  elemCount=%d, globalHash=%08x, docFlags=%08x, nodeStyleHash=%08x", _elemCount, globalHash, docFlags, res);
14947     res = res * 31 + _imgScalingOptions.getHash();
14948     res = res * 31 + _spaceWidthScalePercent;
14949     res = res * 31 + _minSpaceCondensingPercent;
14950     res = res * 31 + _unusedSpaceThresholdPercent;
14951 
14952     // _maxAddedLetterSpacingPercent does not need to be accounted, as, working
14953     // only on a laid out line, it does not need a re-rendering, but just
14954     // a _renderedBlockCache.clear() to reformat paragraphs and have the
14955     // word re-positioned (the paragraphs width & height do not change)
14956 
14957     // Hanging punctuation does not need to trigger a re-render, as
14958     // it's now ensured by alignLine() and won't change paragraphs height.
14959     // We just need to _renderedBlockCache.clear() when it changes.
14960     // if ( _hangingPunctuationEnabled )
14961     //     res = res * 75 + 1761;
14962 
14963     res = res * 31 + _renderBlockRenderingFlags;
14964     res = res * 31 + _interlineScaleFactor;
14965 
14966     res = (res * 31 + globalHash) * 31 + docFlags;
14967 //    CRLog::info("Calculated style hash = %08x", res);
14968     CRLog::debug("calcStyleHash done");
14969     return res;
14970 }
14971 
validateChild(ldomNode * node)14972 static void validateChild( ldomNode * node )
14973 {
14974     // DEBUG TEST
14975     if ( !node->isRoot() && node->getParentNode()->getChildIndex( node->getDataIndex() )<0 ) {
14976         CRLog::error("Invalid parent->child relation for nodes %d->%d", node->getParentNode()->getDataIndex(), node->getParentNode()->getDataIndex() );
14977     }
14978 }
14979 
14980 /// called on document loading end
validateDocument()14981 bool tinyNodeCollection::validateDocument()
14982 {
14983     ((ldomDocument*)this)->getRootNode()->recurseElements(validateChild);
14984     int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
14985     bool res = true;
14986     for ( int i=0; i<count; i++ ) {
14987         int offs = i*TNC_PART_LEN;
14988         int sz = TNC_PART_LEN;
14989         if ( offs + sz > _elemCount+1 ) {
14990             sz = _elemCount+1 - offs;
14991         }
14992         ldomNode * buf = _elemList[i];
14993         for ( int j=0; j<sz; j++ ) {
14994             buf[j].setDocumentIndex( _docIndex );
14995             if ( buf[j].isElement() ) {
14996                 lUInt16 style = getNodeStyleIndex( buf[j]._handle._dataIndex );
14997                 lUInt16 font = getNodeFontIndex( buf[j]._handle._dataIndex );;
14998                 if ( !style ) {
14999                     if ( !buf[j].isRoot() ) {
15000                         CRLog::error("styleId=0 for node <%s> %d", LCSTR(buf[j].getNodeName()), buf[j].getDataIndex());
15001                         res = false;
15002                     }
15003                 } else if ( _styles.get(style).isNull() ) {
15004                     CRLog::error("styleId!=0, but absent in cache for node <%s> %d", LCSTR(buf[j].getNodeName()), buf[j].getDataIndex());
15005                     res = false;
15006                 }
15007                 if ( !font ) {
15008                     if ( !buf[j].isRoot() ) {
15009                         CRLog::error("fontId=0 for node <%s>", LCSTR(buf[j].getNodeName()));
15010                         res = false;
15011                     }
15012                 } else if ( _fonts.get(font).isNull() ) {
15013                     CRLog::error("fontId!=0, but absent in cache for node <%s>", LCSTR(buf[j].getNodeName()));
15014                     res = false;
15015                 }
15016             }
15017         }
15018     }
15019     return res;
15020 }
15021 
updateLoadedStyles(bool enabled)15022 bool tinyNodeCollection::updateLoadedStyles( bool enabled )
15023 {
15024     int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
15025     bool res = true;
15026     LVArray<css_style_ref_t> * list = _styles.getIndex();
15027 
15028     _fontMap.clear(); // style index to font index
15029 
15030     for ( int i=0; i<count; i++ ) {
15031         int offs = i*TNC_PART_LEN;
15032         int sz = TNC_PART_LEN;
15033         if ( offs + sz > _elemCount+1 ) {
15034             sz = _elemCount+1 - offs;
15035         }
15036         ldomNode * buf = _elemList[i];
15037         for ( int j=0; j<sz; j++ ) {
15038             buf[j].setDocumentIndex( _docIndex );
15039             if ( buf[j].isElement() ) {
15040                 lUInt16 style = getNodeStyleIndex( buf[j]._handle._dataIndex );
15041                 if ( enabled && style!=0 ) {
15042                     css_style_ref_t s = list->get( style );
15043                     if ( !s.isNull() ) {
15044                         lUInt16 fntIndex = _fontMap.get( style );
15045                         if ( fntIndex==0 ) {
15046                             LVFontRef fnt = getFont(s.get(), getFontContextDocIndex());
15047                             fntIndex = (lUInt16)_fonts.cache( fnt );
15048                             if ( fnt.isNull() ) {
15049                                 CRLog::error("font not found for style!");
15050                             } else {
15051                                 _fontMap.set(style, fntIndex);
15052                             }
15053                         } else {
15054                             _fonts.addIndexRef( fntIndex );
15055                         }
15056                         if ( fntIndex<=0 ) {
15057                             CRLog::error("font caching failed for style!");
15058                             res = false;
15059                         } else {
15060                             setNodeFontIndex( buf[j]._handle._dataIndex, fntIndex );
15061                             //buf[j]._data._pelem._fontIndex = fntIndex;
15062                         }
15063                     } else {
15064                         CRLog::error("Loaded style index %d not found in style collection", (int)style);
15065                         setNodeFontIndex( buf[j]._handle._dataIndex, 0 );
15066                         setNodeStyleIndex( buf[j]._handle._dataIndex, 0 );
15067 //                        buf[j]._data._pelem._styleIndex = 0;
15068 //                        buf[j]._data._pelem._fontIndex = 0;
15069                         res = false;
15070                     }
15071                 } else {
15072                     setNodeFontIndex( buf[j]._handle._dataIndex, 0 );
15073                     setNodeStyleIndex( buf[j]._handle._dataIndex, 0 );
15074 //                    buf[j]._data._pelem._styleIndex = 0;
15075 //                    buf[j]._data._pelem._fontIndex = 0;
15076                 }
15077             }
15078         }
15079     }
15080 #ifdef TODO_INVESTIGATE
15081     if ( enabled && res) {
15082         //_styles.setIndex( *list );
15083         // correct list reference counters
15084 
15085         for ( int i=0; i<list->length(); i++ ) {
15086             if ( !list->get(i).isNull() ) {
15087                 // decrease reference counter
15088                 // TODO:
15089                 //_styles.release( list->get(i) );
15090             }
15091         }
15092     }
15093 #endif
15094     delete list;
15095 //    getRootNode()->setFont( _def_font );
15096 //    getRootNode()->setStyle( _def_style );
15097     _nodeStyleHash = 0;
15098     return res;
15099 }
15100 
15101 /// swaps to cache file or saves changes, limited by time interval
swapToCache(CRTimerUtil & maxTime)15102 ContinuousOperationResult ldomDocument::swapToCache( CRTimerUtil & maxTime )
15103 {
15104     CRLog::trace("ldomDocument::swapToCache entered");
15105     if ( _maperror )
15106         return CR_ERROR;
15107     if ( !_mapped ) {
15108         CRLog::trace("ldomDocument::swapToCache creating cache file");
15109         if ( !createCacheFile() ) {
15110             CRLog::error("ldomDocument::swapToCache: failed: cannot create cache file");
15111             _maperror = true;
15112             return CR_ERROR;
15113         }
15114     }
15115     _mapped = true;
15116     if (!maxTime.infinite()) {
15117         CRLog::info("Cache file is created, but document saving is postponed");
15118         return CR_TIMEOUT;
15119     }
15120     ContinuousOperationResult res = saveChanges(maxTime);
15121     if ( res==CR_ERROR )
15122     {
15123         CRLog::error("Error while saving changes to cache file");
15124         _maperror = true;
15125         return CR_ERROR;
15126     }
15127     CRLog::info("Successfully saved document to cache file: %dK", _cacheFile->getSize()/1024 );
15128     return res;
15129 }
15130 
15131 /// saves recent changes to mapped file
updateMap(CRTimerUtil & maxTime,LVDocViewCallback * progressCallback)15132 ContinuousOperationResult ldomDocument::updateMap(CRTimerUtil & maxTime, LVDocViewCallback * progressCallback)
15133 {
15134     if ( !_cacheFile || !_mapped )
15135         return CR_DONE;
15136 
15137     if ( _cacheFileLeaveAsDirty ) {
15138         CRLog::info("requested to set cache file as dirty without any update");
15139         _cacheFile->setDirtyFlag(true);
15140         return CR_DONE;
15141     }
15142 
15143     if ( !_cacheFileStale) {
15144         CRLog::info("No change, cache file update not needed");
15145         return CR_DONE;
15146     }
15147     CRLog::info("Updating cache file");
15148 
15149     ContinuousOperationResult res = saveChanges(maxTime, progressCallback); // NOLINT: Call to virtual function during destruction
15150     if ( res==CR_ERROR )
15151     {
15152         CRLog::error("Error while saving changes to cache file");
15153         return CR_ERROR;
15154     }
15155 
15156     if ( res==CR_DONE ) {
15157         CRLog::info("Cache file updated successfully");
15158         dumpStatistics();
15159     }
15160     return res;
15161 }
15162 
15163 #endif
15164 
15165 static const char * doccache_magic = "CoolReader3 Document Cache Directory Index\nV1.00\n";
15166 
15167 /// document cache
15168 class ldomDocCacheImpl : public ldomDocCache
15169 {
15170     lString32 _cacheDir;
15171     lvsize_t _maxSize;
15172     lUInt32 _oldStreamSize;
15173     lUInt32 _oldStreamCRC;
15174 
15175     struct FileItem {
15176         lString32 filename;
15177         lUInt32 size;
15178     };
15179     LVPtrVector<FileItem> _files;
15180 public:
ldomDocCacheImpl(lString32 cacheDir,lvsize_t maxSize)15181     ldomDocCacheImpl( lString32 cacheDir, lvsize_t maxSize )
15182         : _cacheDir( cacheDir ), _maxSize( maxSize ), _oldStreamSize(0), _oldStreamCRC(0)
15183     {
15184         LVAppendPathDelimiter( _cacheDir );
15185         CRLog::trace("ldomDocCacheImpl(%s maxSize=%d)", LCSTR(_cacheDir), (int)maxSize);
15186     }
15187 
writeIndex()15188     bool writeIndex()
15189     {
15190         lString32 filename = _cacheDir + "cr3cache.inx";
15191         if (_oldStreamSize == 0)
15192         {
15193             LVStreamRef oldStream = LVOpenFileStream(filename.c_str(), LVOM_READ);
15194             if (!oldStream.isNull()) {
15195                 _oldStreamSize = (lUInt32)oldStream->GetSize();
15196                 _oldStreamCRC = (lUInt32)oldStream->getcrc32();
15197             }
15198         }
15199 
15200         // fill buffer
15201         SerialBuf buf( 16384, true );
15202         buf.putMagic( doccache_magic );
15203         lUInt32 start = buf.pos();
15204         int count = _files.length();
15205         buf << (lUInt32)count;
15206         for ( int i=0; i<count && !buf.error(); i++ ) {
15207             FileItem * item = _files[i];
15208             buf << item->filename;
15209             buf << item->size;
15210             CRLog::trace("cache item: %s %d", LCSTR(item->filename), (int)item->size);
15211         }
15212         buf.putCRC( buf.pos() - start );
15213         if ( buf.error() )
15214             return false;
15215         lUInt32 newCRC = buf.getCRC();
15216         lUInt32 newSize = buf.pos();
15217 
15218         // check to avoid rewritting of identical file
15219         if (newCRC != _oldStreamCRC || newSize != _oldStreamSize) {
15220             // changed: need to write
15221             CRLog::trace("Writing cache index");
15222             LVStreamRef stream = LVOpenFileStream(filename.c_str(), LVOM_WRITE);
15223             if ( !stream )
15224                 return false;
15225             if ( stream->Write( buf.buf(), buf.pos(), NULL )!=LVERR_OK )
15226                 return false;
15227             _oldStreamCRC = newCRC;
15228             _oldStreamSize = newSize;
15229         }
15230         return true;
15231     }
15232 
readIndex()15233     bool readIndex(  )
15234     {
15235         lString32 filename = _cacheDir + "cr3cache.inx";
15236         // read index
15237         lUInt32 totalSize = 0;
15238         LVStreamRef instream = LVOpenFileStream( filename.c_str(), LVOM_READ );
15239         if ( !instream.isNull() ) {
15240             LVStreamBufferRef sb = instream->GetReadBuffer(0, instream->GetSize() );
15241             if ( !sb )
15242                 return false;
15243             SerialBuf buf( sb->getReadOnly(), sb->getSize() );
15244             if ( !buf.checkMagic( doccache_magic ) ) {
15245                 CRLog::error("wrong cache index file format");
15246                 return false;
15247             }
15248 
15249             lUInt32 start = buf.pos();
15250             lUInt32 count;
15251             buf >> count;
15252             for (lUInt32 i=0; i < count && !buf.error(); i++) {
15253                 FileItem * item = new FileItem();
15254                 _files.add( item );
15255                 buf >> item->filename;
15256                 buf >> item->size;
15257                 CRLog::trace("cache %d: %s [%d]", i, UnicodeToUtf8(item->filename).c_str(), (int)item->size );
15258                 totalSize += item->size;
15259             }
15260             if ( !buf.checkCRC( buf.pos() - start ) ) {
15261                 CRLog::error("CRC32 doesn't match in cache index file");
15262                 return false;
15263             }
15264 
15265             if ( buf.error() )
15266                 return false;
15267 
15268             CRLog::info( "Document cache index file read ok, %d files in cache, %d bytes", _files.length(), totalSize );
15269             return true;
15270         } else {
15271             CRLog::error( "Document cache index file cannot be read" );
15272             return false;
15273         }
15274     }
15275 
15276     /// remove all .cr3 files which are not listed in index
removeExtraFiles()15277     bool removeExtraFiles( )
15278     {
15279         LVContainerRef container;
15280         container = LVOpenDirectory( _cacheDir.c_str(), U"*.cr3" );
15281         if ( container.isNull() ) {
15282             if ( !LVCreateDirectory( _cacheDir ) ) {
15283                 CRLog::error("Cannot create directory %s", UnicodeToUtf8(_cacheDir).c_str() );
15284                 return false;
15285             }
15286             container = LVOpenDirectory( _cacheDir.c_str(), U"*.cr3" );
15287             if ( container.isNull() ) {
15288                 CRLog::error("Cannot open directory %s", UnicodeToUtf8(_cacheDir).c_str() );
15289                 return false;
15290             }
15291         }
15292         for ( int i=0; i<container->GetObjectCount(); i++ ) {
15293             const LVContainerItemInfo * item = container->GetObjectInfo( i );
15294             if ( !item->IsContainer() ) {
15295                 lString32 fn = item->GetName();
15296                 if ( !fn.endsWith(".cr3") )
15297                     continue;
15298                 if ( findFileIndex(fn)<0 ) {
15299                     // delete file
15300                     CRLog::info("Removing cache file not specified in index: %s", UnicodeToUtf8(fn).c_str() );
15301                     if ( !LVDeleteFile( _cacheDir + fn ) ) {
15302                         CRLog::error("Error while removing cache file not specified in index: %s", UnicodeToUtf8(fn).c_str() );
15303                     }
15304                 }
15305             }
15306         }
15307         return true;
15308     }
15309 
15310     // remove all extra files to add new one of specified size
reserve(lvsize_t allocSize)15311     bool reserve( lvsize_t allocSize )
15312     {
15313         bool res = true;
15314         // remove extra files specified in list
15315         lvsize_t dirsize = allocSize;
15316         for ( int i=0; i<_files.length(); ) {
15317             if ( LVFileExists( _cacheDir + _files[i]->filename ) ) {
15318                 if ( (i>0 || allocSize>0) && dirsize+_files[i]->size > _maxSize ) {
15319                     if ( LVDeleteFile( _cacheDir + _files[i]->filename ) ) {
15320                         _files.erase(i, 1);
15321                     } else {
15322                         CRLog::error("Cannot delete cache file %s", UnicodeToUtf8(_files[i]->filename).c_str() );
15323                         dirsize += _files[i]->size;
15324                         res = false;
15325                         i++;
15326                     }
15327                 } else {
15328                     dirsize += _files[i]->size;
15329                     i++;
15330                 }
15331             } else {
15332                 CRLog::error("File %s is found in cache index, but does not exist", UnicodeToUtf8(_files[i]->filename).c_str() );
15333                 _files.erase(i, 1);
15334             }
15335         }
15336         return res;
15337     }
15338 
findFileIndex(lString32 filename)15339     int findFileIndex( lString32 filename )
15340     {
15341         for ( int i=0; i<_files.length(); i++ ) {
15342             if ( _files[i]->filename == filename )
15343                 return i;
15344         }
15345         return -1;
15346     }
15347 
moveFileToTop(lString32 filename,lUInt32 size)15348     bool moveFileToTop( lString32 filename, lUInt32 size )
15349     {
15350         int index = findFileIndex( filename );
15351         if ( index<0 ) {
15352             FileItem * item = new FileItem();
15353             item->filename = filename;
15354             item->size = size;
15355             _files.insert( 0, item );
15356         } else {
15357             _files.move( 0, index );
15358             _files[0]->size = size;
15359         }
15360         return writeIndex();
15361     }
15362 
init()15363     bool init()
15364     {
15365         CRLog::info("Initialize document cache in directory %s", UnicodeToUtf8(_cacheDir).c_str() );
15366         // read index
15367         if ( readIndex(  ) ) {
15368             // read successfully
15369             // remove files not specified in list
15370             removeExtraFiles( );
15371         } else {
15372             if ( !LVCreateDirectory( _cacheDir ) ) {
15373                 CRLog::error("Document Cache: cannot create cache directory %s, disabling cache", UnicodeToUtf8(_cacheDir).c_str() );
15374                 return false;
15375             }
15376             _files.clear();
15377 
15378         }
15379         reserve(0);
15380         if ( !writeIndex() )
15381             return false; // cannot write index: read only?
15382         return true;
15383     }
15384 
15385     /// remove all files
clear()15386     bool clear()
15387     {
15388         for ( int i=0; i<_files.length(); i++ )
15389             LVDeleteFile( _files[i]->filename );
15390         _files.clear();
15391         return writeIndex();
15392     }
15393 
15394     // dir/filename.{crc32}.cr3
makeFileName(lString32 filename,lUInt32 crc,lUInt32 docFlags)15395     lString32 makeFileName( lString32 filename, lUInt32 crc, lUInt32 docFlags )
15396     {
15397         lString32 fn;
15398         lString8 filename8 = UnicodeToTranslit(filename);
15399         bool lastUnderscore = false;
15400         int goodCount = 0;
15401         int badCount = 0;
15402         for (int i = 0; i < filename8.length(); i++) {
15403             lChar32 ch = filename8[i];
15404 
15405             if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '.' || ch == '-') {
15406                 fn << ch;
15407                 lastUnderscore = false;
15408                 goodCount++;
15409             } else {
15410                 if (!lastUnderscore) {
15411                     fn << U"_";
15412                     lastUnderscore = true;
15413                 }
15414                 badCount++;
15415             }
15416         }
15417         if (goodCount < 2 || badCount > goodCount * 2)
15418             fn << "_noname";
15419         if (fn.length() > 25)
15420             fn = fn.substr(0, 12) + "-" + fn.substr(fn.length()-12, 12);
15421         char s[16];
15422         sprintf(s, ".%08x.%d.cr3", (unsigned)crc, (int)docFlags);
15423         return fn + lString32( s ); //_cacheDir +
15424     }
15425 
15426     /// open existing cache file stream
openExisting(lString32 filename,lUInt32 crc,lUInt32 docFlags,lString32 & cachePath)15427     LVStreamRef openExisting( lString32 filename, lUInt32 crc, lUInt32 docFlags, lString32 &cachePath )
15428     {
15429         lString32 fn = makeFileName( filename, crc, docFlags );
15430         CRLog::debug("ldomDocCache::openExisting(%s)", LCSTR(fn));
15431         // Try filename with ".keep" extension (that a user can manually add
15432         // to a .cr3 cache file, for it to no more be maintained by crengine
15433         // in its index, thus not subject to _maxSize enforcement, so sure
15434         // to not be deleted by crengine)
15435         lString32 fn_keep = _cacheDir + fn + ".keep";
15436         if ( LVFileExists(fn_keep) ) {
15437             LVStreamRef stream = LVOpenFileStream( fn_keep.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15438             if ( !stream.isNull() ) {
15439                 CRLog::info( "ldomDocCache::openExisting - opening user renamed cache file %s", UnicodeToUtf8(fn_keep).c_str() );
15440                 cachePath = fn_keep;
15441 #if ENABLED_BLOCK_WRITE_CACHE
15442                 stream = LVCreateBlockWriteStream( stream, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15443 #endif
15444                 return stream;
15445             }
15446         }
15447         LVStreamRef res;
15448         if ( findFileIndex( fn ) < 0 ) {
15449             CRLog::error( "ldomDocCache::openExisting - File %s is not found in cache index", UnicodeToUtf8(fn).c_str() );
15450             return res;
15451         }
15452         lString32 pathname = _cacheDir + fn;
15453         res = LVOpenFileStream( pathname.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15454         if ( !res ) {
15455             CRLog::error( "ldomDocCache::openExisting - File %s is listed in cache index, but cannot be opened", UnicodeToUtf8(fn).c_str() );
15456             return res;
15457         }
15458         cachePath = pathname;
15459 
15460 #if ENABLED_BLOCK_WRITE_CACHE
15461         res = LVCreateBlockWriteStream( res, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15462 #if TEST_BLOCK_STREAM
15463 
15464         LVStreamRef stream2 = LVOpenFileStream( (_cacheDir + fn + "_c").c_str(), LVOM_APPEND );
15465         if ( !stream2 ) {
15466             CRLog::error( "ldomDocCache::openExisting - file %s is cannot be created", UnicodeToUtf8(fn).c_str() );
15467             return stream2;
15468         }
15469         res = LVCreateCompareTestStream(res, stream2);
15470 #endif
15471 #endif
15472 
15473         lUInt32 fileSize = (lUInt32) res->GetSize();
15474         moveFileToTop( fn, fileSize );
15475         return res;
15476     }
15477 
15478     /// create new cache file
createNew(lString32 filename,lUInt32 crc,lUInt32 docFlags,lUInt32 fileSize,lString32 & cachePath)15479     LVStreamRef createNew( lString32 filename, lUInt32 crc, lUInt32 docFlags, lUInt32 fileSize, lString32 &cachePath )
15480     {
15481         lString32 fn = makeFileName( filename, crc, docFlags );
15482         LVStreamRef res;
15483         lString32 pathname = _cacheDir + fn;
15484         // If this cache filename exists with a ".keep" extension (manually
15485         // added by the user), and we were going to create a new one (because
15486         // this .keep is invalid, or cache file format version has changed),
15487         // remove it and create the new one with this same .keep extension,
15488         // so it stays (as wished by the user) not maintained by crengine.
15489         lString32 fn_keep = pathname + ".keep";
15490         if ( LVFileExists( fn_keep ) ) {
15491             LVDeleteFile( pathname ); // delete .cr3 if any
15492             LVDeleteFile( fn_keep ); // delete invalid .cr3.keep
15493             LVStreamRef stream = LVOpenFileStream( fn_keep.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15494             if ( !stream.isNull() ) {
15495                 CRLog::info( "ldomDocCache::createNew - re-creating user renamed cache file %s", UnicodeToUtf8(fn_keep).c_str() );
15496                 cachePath = fn_keep;
15497 #if ENABLED_BLOCK_WRITE_CACHE
15498                 stream = LVCreateBlockWriteStream( stream, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15499 #endif
15500                 return stream;
15501             }
15502         }
15503         if ( findFileIndex( pathname ) >= 0 )
15504             LVDeleteFile( pathname );
15505         reserve( fileSize/10 );
15506         //res = LVMapFileStream( (_cacheDir+fn).c_str(), LVOM_APPEND, fileSize );
15507         LVDeleteFile( pathname ); // try to delete, ignore errors
15508         res = LVOpenFileStream( pathname.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15509         if ( !res ) {
15510             CRLog::error( "ldomDocCache::createNew - file %s is cannot be created", UnicodeToUtf8(fn).c_str() );
15511             return res;
15512         }
15513         cachePath = pathname;
15514 #if ENABLED_BLOCK_WRITE_CACHE
15515         res = LVCreateBlockWriteStream( res, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15516 #if TEST_BLOCK_STREAM
15517         LVStreamRef stream2 = LVOpenFileStream( (pathname+U"_c").c_str(), LVOM_APPEND );
15518         if ( !stream2 ) {
15519             CRLog::error( "ldomDocCache::createNew - file %s is cannot be created", UnicodeToUtf8(fn).c_str() );
15520             return stream2;
15521         }
15522         res = LVCreateCompareTestStream(res, stream2);
15523 #endif
15524 #endif
15525         moveFileToTop( fn, fileSize );
15526         return res;
15527     }
15528 
~ldomDocCacheImpl()15529     virtual ~ldomDocCacheImpl()
15530     {
15531     }
15532 };
15533 
15534 static ldomDocCacheImpl * _cacheInstance = NULL;
15535 
init(lString32 cacheDir,lvsize_t maxSize)15536 bool ldomDocCache::init( lString32 cacheDir, lvsize_t maxSize )
15537 {
15538     if ( _cacheInstance )
15539         delete _cacheInstance;
15540     CRLog::info("Initialize document cache at %s (max size = %d)", UnicodeToUtf8(cacheDir).c_str(), (int)maxSize );
15541     _cacheInstance = new ldomDocCacheImpl( cacheDir, maxSize );
15542     if ( !_cacheInstance->init() ) {
15543         delete _cacheInstance;
15544         _cacheInstance = NULL;
15545         return false;
15546     }
15547     return true;
15548 }
15549 
close()15550 bool ldomDocCache::close()
15551 {
15552     if ( !_cacheInstance )
15553         return false;
15554     delete _cacheInstance;
15555     _cacheInstance = NULL;
15556     return true;
15557 }
15558 
15559 /// open existing cache file stream
openExisting(lString32 filename,lUInt32 crc,lUInt32 docFlags,lString32 & cachePath)15560 LVStreamRef ldomDocCache::openExisting( lString32 filename, lUInt32 crc, lUInt32 docFlags, lString32 &cachePath )
15561 {
15562     if ( !_cacheInstance )
15563         return LVStreamRef();
15564     return _cacheInstance->openExisting( filename, crc, docFlags, cachePath );
15565 }
15566 
15567 /// create new cache file
createNew(lString32 filename,lUInt32 crc,lUInt32 docFlags,lUInt32 fileSize,lString32 & cachePath)15568 LVStreamRef ldomDocCache::createNew( lString32 filename, lUInt32 crc, lUInt32 docFlags, lUInt32 fileSize, lString32 &cachePath )
15569 {
15570     if ( !_cacheInstance )
15571         return LVStreamRef();
15572     return _cacheInstance->createNew( filename, crc, docFlags, fileSize, cachePath );
15573 }
15574 
15575 /// delete all cache files
clear()15576 bool ldomDocCache::clear()
15577 {
15578     if ( !_cacheInstance )
15579         return false;
15580     return _cacheInstance->clear();
15581 }
15582 
15583 /// returns true if cache is enabled (successfully initialized)
enabled()15584 bool ldomDocCache::enabled()
15585 {
15586     return _cacheInstance!=NULL;
15587 }
15588 
15589 //void calcStyleHash( ldomNode * node, lUInt32 & value )
15590 //{
15591 //    if ( !node )
15592 //        return;
15593 //
15594 //    if ( node->isText() || node->getRendMethod()==erm_invisible ) {
15595 //        value = value * 75 + 1673251;
15596 //        return; // don't go through invisible nodes
15597 //    }
15598 //
15599 //    css_style_ref_t style = node->getStyle();
15600 //    font_ref_t font = node->getFont();
15601 //    lUInt32 styleHash = (!style) ? 4324324 : calcHash( style );
15602 //    lUInt32 fontHash = (!font) ? 256371 : calcHash( font );
15603 //    value = (value*75 + styleHash) * 75 + fontHash;
15604 //
15605 //    int cnt = node->getChildCount();
15606 //    for ( int i=0; i<cnt; i++ ) {
15607 //        calcStyleHash( node->getChildNode(i), value );
15608 //    }
15609 //}
15610 
15611 
15612 #if BUILD_LITE!=1
15613 
15614 /// save document formatting parameters after render
updateRenderContext()15615 void ldomDocument::updateRenderContext()
15616 {
15617     int dx = _page_width;
15618     int dy = _page_height;
15619     _nodeStyleHash = 0; // force recalculation by calcStyleHash()
15620     lUInt32 styleHash = calcStyleHash(_rendered);
15621     lUInt32 stylesheetHash = (((_stylesheet.getHash() * 31) + calcHash(_def_style))*31 + calcHash(_def_font));
15622     //calcStyleHash( getRootNode(), styleHash );
15623     _hdr.render_style_hash = styleHash;
15624     _hdr.stylesheet_hash = stylesheetHash;
15625     _hdr.render_dx = dx;
15626     _hdr.render_dy = dy;
15627     _hdr.render_docflags = _docFlags;
15628     _hdr.node_displaystyle_hash = _nodeDisplayStyleHashInitial; // we keep using the initial one
15629     CRLog::info("Updating render properties: styleHash=%x, stylesheetHash=%x, docflags=%x, width=%x, height=%x, nodeDisplayStyleHash=%x",
15630                 _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy, _hdr.node_displaystyle_hash);
15631 }
15632 
15633 /// check document formatting parameters before render - whether we need to reformat; returns false if render is necessary
checkRenderContext()15634 bool ldomDocument::checkRenderContext()
15635 {
15636     bool res = true;
15637     ldomNode * node = getRootNode();
15638     if (node != NULL && node->getFont().isNull()) {
15639         // This may happen when epubfmt.cpp has called forceReinitStyles()
15640         // because the EPUB contains embedded fonts: a full nodes styles
15641         // re-init is needed to use the new fonts (only available at end
15642         // of loading)
15643         CRLog::info("checkRenderContext: style is not set for root node");
15644         res = false;
15645     }
15646     int dx = _page_width;
15647     int dy = _page_height;
15648     lUInt32 styleHash = calcStyleHash(_rendered);
15649     lUInt32 stylesheetHash = (((_stylesheet.getHash() * 31) + calcHash(_def_style))*31 + calcHash(_def_font));
15650     //calcStyleHash( getRootNode(), styleHash );
15651     if ( styleHash != _hdr.render_style_hash ) {
15652         CRLog::info("checkRenderContext: Style hash doesn't match %x!=%x", styleHash, _hdr.render_style_hash);
15653         res = false;
15654         if (_just_rendered_from_cache)
15655             printf("CRE WARNING: cached rendering is invalid (style hash mismatch): doing full rendering\n");
15656     } else if ( stylesheetHash != _hdr.stylesheet_hash ) {
15657         CRLog::info("checkRenderContext: Stylesheet hash doesn't match %x!=%x", stylesheetHash, _hdr.stylesheet_hash);
15658         res = false;
15659         if (_just_rendered_from_cache)
15660             printf("CRE WARNING: cached rendering is invalid (stylesheet hash mismatch): doing full rendering\n");
15661     } else if ( _docFlags != _hdr.render_docflags ) {
15662         CRLog::info("checkRenderContext: Doc flags don't match %x!=%x", _docFlags, _hdr.render_docflags);
15663         res = false;
15664         if (_just_rendered_from_cache)
15665             printf("CRE WARNING: cached rendering is invalid (doc flags mismatch): doing full rendering\n");
15666     } else if ( dx != (int)_hdr.render_dx ) {
15667         CRLog::info("checkRenderContext: Width doesn't match %x!=%x", dx, (int)_hdr.render_dx);
15668         res = false;
15669         if (_just_rendered_from_cache)
15670             printf("CRE WARNING: cached rendering is invalid (page width mismatch): doing full rendering\n");
15671     } else if ( dy != (int)_hdr.render_dy ) {
15672         CRLog::info("checkRenderContext: Page height doesn't match %x!=%x", dy, (int)_hdr.render_dy);
15673         res = false;
15674         if (_just_rendered_from_cache)
15675             printf("CRE WARNING: cached rendering is invalid (page height mismatch): doing full rendering\n");
15676     }
15677     // no need to check for _nodeDisplayStyleHash != _hdr.node_displaystyle_hash:
15678     // this is implicitely done by styleHash != _hdr.render_style_hash (whose _nodeDisplayStyleHash is a subset)
15679     _just_rendered_from_cache = false;
15680     if ( res ) {
15681 
15682         //if ( pages->length()==0 ) {
15683 //            _pagesData.reset();
15684 //            pages->deserialize( _pagesData );
15685         //}
15686 
15687         return true;
15688     }
15689 //    _hdr.render_style_hash = styleHash;
15690 //    _hdr.stylesheet_hash = stylesheetHash;
15691 //    _hdr.render_dx = dx;
15692 //    _hdr.render_dy = dy;
15693 //    _hdr.render_docflags = _docFlags;
15694 //    CRLog::info("New render properties: styleHash=%x, stylesheetHash=%x, docflags=%04x, width=%d, height=%d",
15695 //                _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy);
15696     return false;
15697 }
15698 
15699 #endif
15700 
setStyleSheet(const char * css,bool replace)15701 void lxmlDocBase::setStyleSheet( const char * css, bool replace )
15702 {
15703     lString8 s(css);
15704 
15705     //CRLog::trace("lxmlDocBase::setStyleSheet(length:%d replace:%s css text hash: %x)", strlen(css), replace ? "yes" : "no", s.getHash());
15706     lUInt32 oldHash = _stylesheet.getHash();
15707     if ( replace ) {
15708         //CRLog::debug("cleaning stylesheet contents");
15709         _stylesheet.clear();
15710     }
15711     if ( css && *css ) {
15712         //CRLog::debug("appending stylesheet contents: \n%s", css);
15713         _stylesheet.parse( css, true );
15714         // We use override_important=true: we are the only code
15715         // that sets the main CSS (including style tweaks). We allow
15716         // any !important to override any previous !important.
15717         // Other calls to _stylesheet.parse() elsewhere are used to
15718         // include document embedded or inline CSS, with the default
15719         // of override_important=false, so they won't override
15720         // the ones we set here.
15721     }
15722     lUInt32 newHash = _stylesheet.getHash();
15723     if (oldHash != newHash) {
15724         CRLog::debug("New stylesheet hash: %08x", newHash);
15725     }
15726 }
15727 
15728 
15729 
15730 
15731 
15732 
15733 //=====================================================
15734 // ldomElement declaration placed here to hide DOM implementation
15735 // use ldomNode rich interface instead
15736 class tinyElement
15737 {
15738     friend struct ldomNode;
15739 private:
15740     ldomDocument * _document;
15741     ldomNode * _parentNode;
15742     lUInt16 _id;
15743     lUInt16 _nsid;
15744     LVArray < lInt32 > _children;
15745     ldomAttributeCollection _attrs;
15746     lvdom_element_render_method _rendMethod;
15747 public:
tinyElement(ldomDocument * document,ldomNode * parentNode,lUInt16 nsid,lUInt16 id)15748     tinyElement( ldomDocument * document, ldomNode * parentNode, lUInt16 nsid, lUInt16 id )
15749     : _document(document), _parentNode(parentNode), _id(id), _nsid(nsid), _rendMethod(erm_invisible)
15750     { _document->_tinyElementCount++; }
15751     /// destructor
~tinyElement()15752     ~tinyElement() { _document->_tinyElementCount--; }
15753 };
15754 
15755 
15756 #define NPELEM _data._elem_ptr
15757 #define NPTEXT _data._text_ptr._str
15758 
15759 //=====================================================
15760 
15761 /// minimize memory consumption
compact()15762 void tinyNodeCollection::compact()
15763 {
15764     _textStorage.compact(0xFFFFFF);
15765     _elemStorage.compact(0xFFFFFF);
15766     _rectStorage.compact(0xFFFFFF);
15767     _styleStorage.compact(0xFFFFFF);
15768 }
15769 
15770 /// allocate new tinyElement
allocTinyElement(ldomNode * parent,lUInt16 nsid,lUInt16 id)15771 ldomNode * tinyNodeCollection::allocTinyElement( ldomNode * parent, lUInt16 nsid, lUInt16 id )
15772 {
15773     ldomNode * node = allocTinyNode( ldomNode::NT_ELEMENT );
15774     tinyElement * elem = new tinyElement( (ldomDocument*)this, parent, nsid, id );
15775     node->NPELEM = elem;
15776     return node;
15777 }
15778 
readOnlyError()15779 static void readOnlyError()
15780 {
15781     crFatalError( 125, "Text node is persistent (read-only)! Call modify() to get r/w instance." );
15782 }
15783 
15784 //=====================================================
15785 
15786 // shortcut for dynamic element accessor
15787 #ifdef _DEBUG
15788   #define ASSERT_NODE_NOT_NULL \
15789     if ( isNull() ) \
15790 		crFatalError( 1313, "Access to null node" )
15791 #else
15792   #define ASSERT_NODE_NOT_NULL
15793 #endif
15794 
15795 /// returns node level, 0 is root node
getNodeLevel() const15796 lUInt8 ldomNode::getNodeLevel() const
15797 {
15798     const ldomNode * node = this;
15799     int level = 0;
15800     for ( ; node; node = node->getParentNode() )
15801         level++;
15802     return (lUInt8)level;
15803 }
15804 
onCollectionDestroy()15805 void ldomNode::onCollectionDestroy()
15806 {
15807     if ( isNull() )
15808         return;
15809     //CRLog::trace("ldomNode::onCollectionDestroy(%d) type=%d", this->_handle._dataIndex, TNTYPE);
15810     switch ( TNTYPE ) {
15811     case NT_TEXT:
15812         delete _data._text_ptr;
15813         _data._text_ptr = NULL;
15814         break;
15815     case NT_ELEMENT:
15816         // ???
15817 #if BUILD_LITE!=1
15818         getDocument()->clearNodeStyle( _handle._dataIndex );
15819 #endif
15820         delete NPELEM;
15821         NPELEM = NULL;
15822         break;
15823 #if BUILD_LITE!=1
15824     case NT_PTEXT:      // immutable (persistent) text node
15825         // do nothing
15826         break;
15827     case NT_PELEMENT:   // immutable (persistent) element node
15828         // do nothing
15829         break;
15830 #endif
15831     }
15832 }
15833 
destroy()15834 void ldomNode::destroy()
15835 {
15836     if ( isNull() )
15837         return;
15838     //CRLog::trace("ldomNode::destroy(%d) type=%d", this->_handle._dataIndex, TNTYPE);
15839     switch ( TNTYPE ) {
15840     case NT_TEXT:
15841         delete _data._text_ptr;
15842         break;
15843     case NT_ELEMENT:
15844         {
15845 #if BUILD_LITE!=1
15846             getDocument()->clearNodeStyle(_handle._dataIndex);
15847 #endif
15848             tinyElement * me = NPELEM;
15849             // delete children
15850             for ( int i=0; i<me->_children.length(); i++ ) {
15851                 ldomNode * child = getDocument()->getTinyNode(me->_children[i]);
15852                 if ( child )
15853                     child->destroy();
15854             }
15855             delete me;
15856             NPELEM = NULL;
15857         }
15858         delete NPELEM;
15859         break;
15860 #if BUILD_LITE!=1
15861     case NT_PTEXT:
15862         // disable removing from storage: to minimize modifications
15863         //_document->_textStorage.freeNode( _data._ptext_addr._addr );
15864         break;
15865     case NT_PELEMENT:   // immutable (persistent) element node
15866         {
15867             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
15868             for ( int i=0; i<me->childCount; i++ )
15869                 getDocument()->getTinyNode( me->children[i] )->destroy();
15870             getDocument()->clearNodeStyle( _handle._dataIndex );
15871 //            getDocument()->_styles.release( _data._pelem._styleIndex );
15872 //            getDocument()->_fonts.release( _data._pelem._fontIndex );
15873 //            _data._pelem._styleIndex = 0;
15874 //            _data._pelem._fontIndex = 0;
15875             getDocument()->_elemStorage.freeNode( _data._pelem_addr );
15876         }
15877         break;
15878 #endif
15879     }
15880     getDocument()->recycleTinyNode( _handle._dataIndex );
15881 }
15882 
15883 /// returns index of child node by dataIndex
getChildIndex(lUInt32 dataIndex) const15884 int ldomNode::getChildIndex( lUInt32 dataIndex ) const
15885 {
15886     // was here and twice below: dataIndex &= 0xFFFFFFF0;
15887     // The lowest bits of a dataIndex carry properties about the node:
15888     //   bit 0: 0 = text node / 1 = element node
15889     //   bit 1: 0 = mutable node / 1 = immutable (persistent, cached)
15890     // (So, all Text nodes have an even dataIndex, and Element nodes
15891     // all have a odd dataIndex.)
15892     // This '& 0xFFFFFFF0' was to clear these properties so a same
15893     // node can be found if these properties change (mostly useful
15894     // with mutable<>persistent).
15895     // But text nodes and Element nodes use different independant counters
15896     // (see tinyNodeCollection::allocTinyNode(): _elemCount++, _textCount++)
15897     // and we may have a text node with dataIndex 8528, and an element
15898     // node with dataIndex 8529, that would be confused with each other
15899     // if we use 0xFFFFFFF0.
15900     // This could cause finding the wrong node, and strange side effects.
15901     // With '& 0xFFFFFFF1' keep the lowest bit.
15902     dataIndex &= 0xFFFFFFF1;
15903     ASSERT_NODE_NOT_NULL;
15904     int parentIndex = -1;
15905     switch ( TNTYPE ) {
15906     case NT_ELEMENT:
15907         {
15908             tinyElement * me = NPELEM;
15909             for ( int i=0; i<me->_children.length(); i++ ) {
15910                 if ( (me->_children[i] & 0xFFFFFFF1) == dataIndex ) {
15911                     // found
15912                     parentIndex = i;
15913                     break;
15914                 }
15915             }
15916         }
15917         break;
15918 #if BUILD_LITE!=1
15919     case NT_PELEMENT:
15920         {
15921             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
15922             for ( int i=0; i<me->childCount; i++ ) {
15923                 if ( (me->children[i] & 0xFFFFFFF1) == dataIndex ) {
15924                     // found
15925                     parentIndex = i;
15926                     break;
15927                 }
15928             }
15929         }
15930         break;
15931     case NT_PTEXT:      // immutable (persistent) text node
15932 #endif
15933     case NT_TEXT:
15934         break;
15935     }
15936     return parentIndex;
15937 }
15938 
15939 /// returns index of node inside parent's child collection
getNodeIndex() const15940 int ldomNode::getNodeIndex() const
15941 {
15942     ASSERT_NODE_NOT_NULL;
15943     ldomNode * parent = getParentNode();
15944     if ( parent )
15945         return parent->getChildIndex( getDataIndex() );
15946     return 0;
15947 }
15948 
15949 /// returns true if node is document's root
isRoot() const15950 bool ldomNode::isRoot() const
15951 {
15952     ASSERT_NODE_NOT_NULL;
15953     switch ( TNTYPE ) {
15954     case NT_ELEMENT:
15955         return !NPELEM->_parentNode;
15956 #if BUILD_LITE!=1
15957     case NT_PELEMENT:   // immutable (persistent) element node
15958         {
15959              ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
15960              return me->parentIndex==0;
15961         }
15962         break;
15963     case NT_PTEXT:      // immutable (persistent) text node
15964         {
15965             return getDocument()->_textStorage.getParent( _data._ptext_addr )==0;
15966         }
15967 #endif
15968     case NT_TEXT:
15969         return _data._text_ptr->getParentIndex()==0;
15970     }
15971     return false;
15972 }
15973 
15974 /// call to invalidate cache if persistent node content is modified
modified()15975 void ldomNode::modified()
15976 {
15977 #if BUILD_LITE!=1
15978     if ( isPersistent() ) {
15979         if ( isElement() )
15980             getDocument()->_elemStorage.modified( _data._pelem_addr );
15981         else
15982             getDocument()->_textStorage.modified( _data._ptext_addr );
15983     }
15984 #endif
15985 }
15986 
15987 /// changes parent of item
setParentNode(ldomNode * parent)15988 void ldomNode::setParentNode( ldomNode * parent )
15989 {
15990     ASSERT_NODE_NOT_NULL;
15991 #ifdef TRACE_AUTOBOX
15992     if ( getParentNode()!=NULL && parent != NULL )
15993         CRLog::trace("Changing parent of %d from %d to %d", getDataIndex(), getParentNode()->getDataIndex(), parent->getDataIndex());
15994 #endif
15995     switch ( TNTYPE ) {
15996     case NT_ELEMENT:
15997         NPELEM->_parentNode = parent;
15998         break;
15999 #if BUILD_LITE!=1
16000     case NT_PELEMENT:   // immutable (persistent) element node
16001         {
16002             lUInt32 parentIndex = parent->_handle._dataIndex;
16003             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16004             if ( me->parentIndex != (int)parentIndex ) {
16005                 me->parentIndex = parentIndex;
16006                 modified();
16007             }
16008         }
16009         break;
16010     case NT_PTEXT:      // immutable (persistent) text node
16011         {
16012             lUInt32 parentIndex = parent->_handle._dataIndex;
16013             getDocument()->_textStorage.setParent(_data._ptext_addr, parentIndex);
16014             //_data._ptext_addr._parentIndex = parentIndex;
16015             //_document->_textStorage.setTextParent( _data._ptext_addr._addr, parentIndex );
16016         }
16017         break;
16018 #endif
16019     case NT_TEXT:
16020         {
16021             lUInt32 parentIndex = parent->_handle._dataIndex;
16022             _data._text_ptr->setParentIndex( parentIndex );
16023         }
16024         break;
16025     }
16026 }
16027 
16028 /// returns dataIndex of node's parent, 0 if no parent
getParentIndex() const16029 int ldomNode::getParentIndex() const
16030 {
16031     ASSERT_NODE_NOT_NULL;
16032 
16033     switch ( TNTYPE ) {
16034     case NT_ELEMENT:
16035         return NPELEM->_parentNode ? NPELEM->_parentNode->getDataIndex() : 0;
16036 #if BUILD_LITE!=1
16037     case NT_PELEMENT:   // immutable (persistent) element node
16038         {
16039             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16040             return me->parentIndex;
16041         }
16042         break;
16043     case NT_PTEXT:      // immutable (persistent) text node
16044         return getDocument()->_textStorage.getParent(_data._ptext_addr);
16045 #endif
16046     case NT_TEXT:
16047         return _data._text_ptr->getParentIndex();
16048     }
16049     return 0;
16050 }
16051 
16052 /// returns pointer to parent node, NULL if node has no parent
getParentNode() const16053 ldomNode * ldomNode::getParentNode() const
16054 {
16055     ASSERT_NODE_NOT_NULL;
16056     int parentIndex = 0;
16057     switch ( TNTYPE ) {
16058     case NT_ELEMENT:
16059         return NPELEM->_parentNode;
16060 #if BUILD_LITE!=1
16061     case NT_PELEMENT:   // immutable (persistent) element node
16062         {
16063             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16064             parentIndex = me->parentIndex;
16065         }
16066         break;
16067     case NT_PTEXT:      // immutable (persistent) text node
16068         parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
16069         break;
16070 #endif
16071     case NT_TEXT:
16072         parentIndex = _data._text_ptr->getParentIndex();
16073         break;
16074     }
16075     return parentIndex ? getTinyNode(parentIndex) : NULL;
16076 }
16077 
16078 /// returns true child node is element
isChildNodeElement(lUInt32 index) const16079 bool ldomNode::isChildNodeElement( lUInt32 index ) const
16080 {
16081     ASSERT_NODE_NOT_NULL;
16082 #if BUILD_LITE!=1
16083     if ( !isPersistent() ) {
16084 #endif
16085         // element
16086         tinyElement * me = NPELEM;
16087         int n = me->_children[index];
16088         return ( (n & 1)==1 );
16089 #if BUILD_LITE!=1
16090     } else {
16091         // persistent element
16092         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16093         int n = me->children[index];
16094         return ( (n & 1)==1 );
16095     }
16096 #endif
16097 }
16098 
16099 /// returns true child node is text
isChildNodeText(lUInt32 index) const16100 bool ldomNode::isChildNodeText( lUInt32 index ) const
16101 {
16102     ASSERT_NODE_NOT_NULL;
16103 #if BUILD_LITE!=1
16104     if ( !isPersistent() ) {
16105 #endif
16106         // element
16107         tinyElement * me = NPELEM;
16108         int n = me->_children[index];
16109         return ( (n & 1)==0 );
16110 #if BUILD_LITE!=1
16111     } else {
16112         // persistent element
16113         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16114         int n = me->children[index];
16115         return ( (n & 1)==0 );
16116     }
16117 #endif
16118 }
16119 
16120 /// returns child node by index, NULL if node with this index is not element or nodeTag!=0 and element node name!=nodeTag
getChildElementNode(lUInt32 index,const lChar32 * nodeTag) const16121 ldomNode * ldomNode::getChildElementNode( lUInt32 index, const lChar32 * nodeTag ) const
16122 {
16123     lUInt16 nodeId = getDocument()->getElementNameIndex(nodeTag);
16124     return getChildElementNode( index, nodeId );
16125 }
16126 
16127 /// returns child node by index, NULL if node with this index is not element or nodeId!=0 and element node id!=nodeId
getChildElementNode(lUInt32 index,lUInt16 nodeId) const16128 ldomNode * ldomNode::getChildElementNode( lUInt32 index, lUInt16 nodeId ) const
16129 {
16130     ASSERT_NODE_NOT_NULL;
16131     ldomNode * res = NULL;
16132 #if BUILD_LITE!=1
16133     if ( !isPersistent() ) {
16134 #endif
16135         // element
16136         tinyElement * me = NPELEM;
16137         int n = me->_children[index];
16138         if ( (n & 1)==0 ) // not element
16139             return NULL;
16140         res = getTinyNode( n );
16141 #if BUILD_LITE!=1
16142     } else {
16143         // persistent element
16144         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16145         int n = me->children[index];
16146         if ( (n & 1)==0 ) // not element
16147             return NULL;
16148         res = getTinyNode( n );
16149     }
16150 #endif
16151     if ( res && nodeId!=0 && res->getNodeId()!=nodeId )
16152         res = NULL;
16153     return res;
16154 }
16155 
16156 /// returns child node by index
getChildNode(lUInt32 index) const16157 ldomNode * ldomNode::getChildNode( lUInt32 index ) const
16158 {
16159     ASSERT_NODE_NOT_NULL;
16160 #if BUILD_LITE!=1
16161     if ( !isPersistent() ) {
16162 #endif
16163         // element
16164         tinyElement * me = NPELEM;
16165         return getTinyNode( me->_children[index] );
16166 #if BUILD_LITE!=1
16167     } else {
16168         // persistent element
16169         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16170         return getTinyNode( me->children[index] );
16171     }
16172 #endif
16173 }
16174 
16175 /// returns element child count
getChildCount() const16176 int ldomNode::getChildCount() const
16177 {
16178     ASSERT_NODE_NOT_NULL;
16179     if ( !isElement() )
16180         return 0;
16181 #if BUILD_LITE!=1
16182     if ( !isPersistent() ) {
16183 #endif
16184         // element
16185         tinyElement * me = NPELEM;
16186         return me->_children.length();
16187 #if BUILD_LITE!=1
16188     } else {
16189         // persistent element
16190         // persistent element
16191         {
16192             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16193 //            if ( me==NULL ) { // DEBUG
16194 //                me = _document->_elemStorage.getElem( _data._pelem_addr );
16195 //            }
16196             return me->childCount;
16197         }
16198     }
16199 #endif
16200 }
16201 
16202 /// returns element attribute count
getAttrCount() const16203 int ldomNode::getAttrCount() const
16204 {
16205     ASSERT_NODE_NOT_NULL;
16206     if ( !isElement() )
16207         return 0;
16208 #if BUILD_LITE!=1
16209     if ( !isPersistent() ) {
16210 #endif
16211         // element
16212         tinyElement * me = NPELEM;
16213         return me->_attrs.length();
16214 #if BUILD_LITE!=1
16215     } else {
16216         // persistent element
16217         {
16218             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16219             return me->attrCount;
16220         }
16221     }
16222 #endif
16223 }
16224 
16225 /// returns attribute value by attribute name id and namespace id
getAttributeValue(lUInt16 nsid,lUInt16 id) const16226 const lString32 & ldomNode::getAttributeValue( lUInt16 nsid, lUInt16 id ) const
16227 {
16228     ASSERT_NODE_NOT_NULL;
16229     if ( !isElement() )
16230         return lString32::empty_str;
16231 #if BUILD_LITE!=1
16232     if ( !isPersistent() ) {
16233 #endif
16234         // element
16235         tinyElement * me = NPELEM;
16236         lUInt32 valueId = me->_attrs.get( nsid, id );
16237         if ( valueId==LXML_ATTR_VALUE_NONE )
16238             return lString32::empty_str;
16239         return getDocument()->getAttrValue(valueId);
16240 #if BUILD_LITE!=1
16241     } else {
16242         // persistent element
16243         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16244         lUInt32 valueId = me->getAttrValueId( nsid, id );
16245         if ( valueId==LXML_ATTR_VALUE_NONE )
16246             return lString32::empty_str;
16247         return getDocument()->getAttrValue(valueId);
16248     }
16249 #endif
16250 }
16251 
16252 /// returns attribute value by attribute name and namespace
getAttributeValue(const lChar32 * nsName,const lChar32 * attrName) const16253 const lString32 & ldomNode::getAttributeValue( const lChar32 * nsName, const lChar32 * attrName ) const
16254 {
16255     ASSERT_NODE_NOT_NULL;
16256     lUInt16 nsId = (nsName && nsName[0]) ? getDocument()->getNsNameIndex( nsName ) : LXML_NS_ANY;
16257     lUInt16 attrId = getDocument()->getAttrNameIndex( attrName );
16258     return getAttributeValue( nsId, attrId );
16259 }
16260 
16261 /// returns attribute value by attribute name and namespace
getAttributeValue(const lChar8 * nsName,const lChar8 * attrName) const16262 const lString32 & ldomNode::getAttributeValue( const lChar8 * nsName, const lChar8 * attrName ) const
16263 {
16264     ASSERT_NODE_NOT_NULL;
16265     lUInt16 nsId = (nsName && nsName[0]) ? getDocument()->getNsNameIndex( nsName ) : LXML_NS_ANY;
16266     lUInt16 attrId = getDocument()->getAttrNameIndex( attrName );
16267     return getAttributeValue( nsId, attrId );
16268 }
16269 
16270 /// returns attribute by index
getAttribute(lUInt32 index) const16271 const lxmlAttribute * ldomNode::getAttribute( lUInt32 index ) const
16272 {
16273     ASSERT_NODE_NOT_NULL;
16274     if ( !isElement() )
16275         return NULL;
16276 #if BUILD_LITE!=1
16277     if ( !isPersistent() ) {
16278 #endif
16279         // element
16280         tinyElement * me = NPELEM;
16281         return me->_attrs[index];
16282 #if BUILD_LITE!=1
16283     } else {
16284         // persistent element
16285         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16286         return me->attr( index );
16287     }
16288 #endif
16289 }
16290 
16291 /// returns true if element node has attribute with specified name id and namespace id
hasAttribute(lUInt16 nsid,lUInt16 id) const16292 bool ldomNode::hasAttribute( lUInt16 nsid, lUInt16 id ) const
16293 {
16294     ASSERT_NODE_NOT_NULL;
16295     if ( !isElement() )
16296         return false;
16297 #if BUILD_LITE!=1
16298     if ( !isPersistent() ) {
16299 #endif
16300         // element
16301         tinyElement * me = NPELEM;
16302         lUInt32 valueId = me->_attrs.get( nsid, id );
16303         return ( valueId!=LXML_ATTR_VALUE_NONE );
16304 #if BUILD_LITE!=1
16305     } else {
16306         // persistent element
16307         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16308         return (me->findAttr( nsid, id ) != NULL);
16309     }
16310 #endif
16311 }
16312 
16313 /// returns attribute name by index
getAttributeName(lUInt32 index) const16314 const lString32 & ldomNode::getAttributeName( lUInt32 index ) const
16315 {
16316     ASSERT_NODE_NOT_NULL;
16317     const lxmlAttribute * attr = getAttribute( index );
16318     if ( attr )
16319         return getDocument()->getAttrName( attr->id );
16320     return lString32::empty_str;
16321 }
16322 
16323 /// sets attribute value
setAttributeValue(lUInt16 nsid,lUInt16 id,const lChar32 * value)16324 void ldomNode::setAttributeValue( lUInt16 nsid, lUInt16 id, const lChar32 * value )
16325 {
16326     ASSERT_NODE_NOT_NULL;
16327     if ( !isElement() )
16328         return;
16329     lUInt32 valueIndex = getDocument()->getAttrValueIndex(value);
16330 #if BUILD_LITE!=1
16331     if ( isPersistent() ) {
16332         // persistent element
16333         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16334         lxmlAttribute * attr = me->findAttr( nsid, id );
16335         if ( attr ) {
16336             attr->index = valueIndex;
16337             modified();
16338             return;
16339         }
16340         // else: convert to modifable and continue as non-persistent
16341         modify();
16342     }
16343 #endif
16344     // element
16345     tinyElement * me = NPELEM;
16346     me->_attrs.set(nsid, id, valueIndex);
16347     if (nsid == LXML_NS_NONE)
16348         getDocument()->onAttributeSet( id, valueIndex, this );
16349 }
16350 
16351 /// returns attribute value by attribute name id, looking at children if needed
getFirstInnerAttributeValue(lUInt16 nsid,lUInt16 id) const16352 const lString32 & ldomNode::getFirstInnerAttributeValue( lUInt16 nsid, lUInt16 id ) const
16353 {
16354     ASSERT_NODE_NOT_NULL;
16355     if (hasAttribute(nsid, id))
16356         return getAttributeValue(nsid, id);
16357     ldomNode * n = (ldomNode *) this;
16358     if (n->isElement() && n->getChildCount() > 0) {
16359         int nextChildIndex = 0;
16360         n = n->getChildNode(nextChildIndex);
16361         while (true) {
16362             // Check only the first time we met a node (nextChildIndex == 0)
16363             // and not when we get back to it from a child to process next sibling
16364             if (nextChildIndex == 0) {
16365                 if (n->isElement() && n->hasAttribute(nsid, id))
16366                     return n->getAttributeValue(nsid, id);
16367             }
16368             // Process next child
16369             if (n->isElement() && nextChildIndex < n->getChildCount()) {
16370                 n = n->getChildNode(nextChildIndex);
16371                 nextChildIndex = 0;
16372                 continue;
16373             }
16374             // No more child, get back to parent and have it process our sibling
16375             nextChildIndex = n->getNodeIndex() + 1;
16376             n = n->getParentNode();
16377             if (!n) // back to root node
16378                 break;
16379             if (n == this && nextChildIndex >= n->getChildCount())
16380                 // back to this node, and done with its children
16381                 break;
16382         }
16383     }
16384     return lString32::empty_str;
16385 }
16386 
16387 /// returns element type structure pointer if it was set in document for this element name
getElementTypePtr()16388 const css_elem_def_props_t * ldomNode::getElementTypePtr()
16389 {
16390     ASSERT_NODE_NOT_NULL;
16391     if ( !isElement() )
16392         return NULL;
16393 #if BUILD_LITE!=1
16394     if ( !isPersistent() ) {
16395 #endif
16396         // element
16397         const css_elem_def_props_t * res = getDocument()->getElementTypePtr(NPELEM->_id);
16398 //        if ( res && res->is_object ) {
16399 //            CRLog::trace("Object found");
16400 //        }
16401         return res;
16402 #if BUILD_LITE!=1
16403     } else {
16404         // persistent element
16405         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16406         const css_elem_def_props_t * res = getDocument()->getElementTypePtr(me->id);
16407 //        if ( res && res->is_object ) {
16408 //            CRLog::trace("Object found");
16409 //        }
16410         return res;
16411     }
16412 #endif
16413 }
16414 
16415 /// returns element name id
getNodeId() const16416 lUInt16 ldomNode::getNodeId() const
16417 {
16418     ASSERT_NODE_NOT_NULL;
16419     if ( !isElement() )
16420         return 0;
16421 #if BUILD_LITE!=1
16422     if ( !isPersistent() ) {
16423         // element
16424 #endif
16425         return NPELEM->_id;
16426 #if BUILD_LITE!=1
16427     } else {
16428         // persistent element
16429         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16430         return me->id;
16431     }
16432 #endif
16433 }
16434 
16435 /// returns element namespace id
getNodeNsId() const16436 lUInt16 ldomNode::getNodeNsId() const
16437 {
16438     ASSERT_NODE_NOT_NULL;
16439     if ( !isElement() )
16440         return 0;
16441 #if BUILD_LITE!=1
16442     if ( !isPersistent() ) {
16443         // element
16444 #endif
16445         return NPELEM->_nsid;
16446 #if BUILD_LITE!=1
16447     } else {
16448         // persistent element
16449         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16450         return me->nsid;
16451     }
16452 #endif
16453 }
16454 
16455 /// replace element name id with another value
setNodeId(lUInt16 id)16456 void ldomNode::setNodeId( lUInt16 id )
16457 {
16458     ASSERT_NODE_NOT_NULL;
16459     if ( !isElement() )
16460         return;
16461 #if BUILD_LITE!=1
16462     if ( !isPersistent() ) {
16463         // element
16464 #endif
16465         NPELEM->_id = id;
16466 #if BUILD_LITE!=1
16467     } else {
16468         // persistent element
16469         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16470         me->id = id;
16471         modified();
16472     }
16473 #endif
16474 }
16475 
16476 /// returns element name
getNodeName() const16477 const lString32 & ldomNode::getNodeName() const
16478 {
16479     ASSERT_NODE_NOT_NULL;
16480     if ( !isElement() )
16481         return lString32::empty_str;
16482 #if BUILD_LITE!=1
16483     if ( !isPersistent() ) {
16484         // element
16485 #endif
16486         return getDocument()->getElementName(NPELEM->_id);
16487 #if BUILD_LITE!=1
16488     } else {
16489         // persistent element
16490         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16491         return getDocument()->getElementName(me->id);
16492     }
16493 #endif
16494 }
16495 
16496 /// returns element name
isNodeName(const char * s) const16497 bool ldomNode::isNodeName(const char * s) const
16498 {
16499     ASSERT_NODE_NOT_NULL;
16500     if ( !isElement() )
16501         return false;
16502     lUInt16 index = getDocument()->findElementNameIndex(s);
16503     if (!index)
16504         return false;
16505 #if BUILD_LITE!=1
16506     if ( !isPersistent() ) {
16507         // element
16508 #endif
16509         return index == NPELEM->_id;
16510 #if BUILD_LITE!=1
16511     } else {
16512         // persistent element
16513         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16514         return index == me->id;
16515     }
16516 #endif
16517 }
16518 
16519 /// returns element namespace name
getNodeNsName() const16520 const lString32 & ldomNode::getNodeNsName() const
16521 {
16522     ASSERT_NODE_NOT_NULL;
16523     if ( !isElement() )
16524         return lString32::empty_str;
16525 #if BUILD_LITE!=1
16526     if ( !isPersistent() ) {
16527 #endif
16528         // element
16529         return getDocument()->getNsName(NPELEM->_nsid);
16530 #if BUILD_LITE!=1
16531     } else {
16532         // persistent element
16533         ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16534         return getDocument()->getNsName(me->nsid);
16535     }
16536 #endif
16537 }
16538 
16539 
16540 
16541 /// returns text node text as wide string
getText(lChar32 blockDelimiter,int maxSize) const16542 lString32 ldomNode::getText( lChar32 blockDelimiter, int maxSize ) const
16543 {
16544     ASSERT_NODE_NOT_NULL;
16545     switch ( TNTYPE ) {
16546 #if BUILD_LITE!=1
16547     case NT_PELEMENT:
16548 #endif
16549     case NT_ELEMENT:
16550         {
16551             lString32 txt;
16552             unsigned cc = getChildCount();
16553             for ( unsigned i=0; i<cc; i++ ) {
16554                 ldomNode * child = getChildNode(i);
16555                 txt += child->getText(blockDelimiter, maxSize);
16556                 if (maxSize != 0 && txt.length() > maxSize)
16557                     break;
16558                 if (i >= cc - 1)
16559                     break;
16560 #if BUILD_LITE!=1
16561                 if ( blockDelimiter && child->isElement() ) {
16562                     if ( !child->getStyle().isNull() && child->getStyle()->display == css_d_block )
16563                         txt << blockDelimiter;
16564                 }
16565 #endif
16566             }
16567             return txt;
16568         }
16569         break;
16570 #if BUILD_LITE!=1
16571     case NT_PTEXT:
16572         return Utf8ToUnicode(getDocument()->_textStorage.getText( _data._ptext_addr ));
16573 #endif
16574     case NT_TEXT:
16575         return _data._text_ptr->getText32();
16576     }
16577     return lString32::empty_str;
16578 }
16579 
16580 /// returns text node text as utf8 string
getText8(lChar8 blockDelimiter,int maxSize) const16581 lString8 ldomNode::getText8( lChar8 blockDelimiter, int maxSize ) const
16582 {
16583     ASSERT_NODE_NOT_NULL;
16584     switch ( TNTYPE ) {
16585     case NT_ELEMENT:
16586 #if BUILD_LITE!=1
16587     case NT_PELEMENT:
16588         {
16589             lString8 txt;
16590             int cc = getChildCount();
16591             for (int i = 0; i < cc; i++) {
16592                 ldomNode * child = getChildNode(i);
16593                 txt += child->getText8(blockDelimiter, maxSize);
16594                 if (maxSize != 0 && txt.length() > maxSize)
16595                     break;
16596                 if (i >= getChildCount() - 1)
16597                     break;
16598                 if ( blockDelimiter && child->isElement() ) {
16599                     if ( child->getStyle()->display == css_d_block )
16600                         txt << blockDelimiter;
16601                 }
16602             }
16603             return txt;
16604         }
16605         break;
16606     case NT_PTEXT:
16607         return getDocument()->_textStorage.getText( _data._ptext_addr );
16608 #endif
16609     case NT_TEXT:
16610         return _data._text_ptr->getText();
16611     }
16612     return lString8::empty_str;
16613 }
16614 
16615 /// sets text node text as wide string
setText(lString32 str)16616 void ldomNode::setText( lString32 str )
16617 {
16618     ASSERT_NODE_NOT_NULL;
16619     switch ( TNTYPE ) {
16620     case NT_ELEMENT:
16621         readOnlyError();
16622         break;
16623 #if BUILD_LITE!=1
16624     case NT_PELEMENT:
16625         readOnlyError();
16626         break;
16627     case NT_PTEXT:
16628         {
16629             // convert persistent text to mutable
16630             lUInt32 parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
16631             getDocument()->_textStorage.freeNode( _data._ptext_addr );
16632             _data._text_ptr = new ldomTextNode( parentIndex, UnicodeToUtf8(str) );
16633             // change type from PTEXT to TEXT
16634             _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_TEXT;
16635         }
16636         break;
16637 #endif
16638     case NT_TEXT:
16639         {
16640             _data._text_ptr->setText( str );
16641         }
16642         break;
16643     }
16644 }
16645 
16646 /// sets text node text as utf8 string
setText8(lString8 utf8)16647 void ldomNode::setText8( lString8 utf8 )
16648 {
16649     ASSERT_NODE_NOT_NULL;
16650     switch ( TNTYPE ) {
16651     case NT_ELEMENT:
16652         readOnlyError();
16653         break;
16654 #if BUILD_LITE!=1
16655     case NT_PELEMENT:
16656         readOnlyError();
16657         break;
16658     case NT_PTEXT:
16659         {
16660             // convert persistent text to mutable
16661             lUInt32 parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
16662             getDocument()->_textStorage.freeNode( _data._ptext_addr );
16663             _data._text_ptr = new ldomTextNode( parentIndex, utf8 );
16664             // change type from PTEXT to TEXT
16665             _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_TEXT;
16666         }
16667         break;
16668 #endif
16669     case NT_TEXT:
16670         {
16671             _data._text_ptr->setText( utf8 );
16672         }
16673         break;
16674     }
16675 }
16676 
16677 #if BUILD_LITE!=1
16678 /// returns node absolute rectangle
getAbsRect(lvRect & rect,bool inner)16679 void ldomNode::getAbsRect( lvRect & rect, bool inner )
16680 {
16681     ASSERT_NODE_NOT_NULL;
16682     ldomNode * node = this;
16683     RenderRectAccessor fmt( node );
16684     rect.left = fmt.getX();
16685     rect.top = fmt.getY();
16686     rect.right = fmt.getWidth();
16687     rect.bottom = fmt.getHeight();
16688     if ( inner && RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
16689         // This flag is set only when in enhanced rendering mode, and
16690         // only on erm_final nodes.
16691         rect.left += fmt.getInnerX();     // add padding left
16692         rect.top += fmt.getInnerY();      // add padding top
16693         rect.right = fmt.getInnerWidth(); // replace by inner width
16694     }
16695     node = node->getParentNode();
16696     for (; node; node = node->getParentNode())
16697     {
16698         RenderRectAccessor fmt( node );
16699         rect.left += fmt.getX();
16700         rect.top += fmt.getY();
16701         if ( RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
16702             // getAbsRect() is mostly used on erm_final nodes. So,
16703             // if we meet another erm_final node in our parent, we are
16704             // probably an embedded floatBox or inlineBox. Embedded
16705             // floatBoxes or inlineBoxes are positioned according
16706             // to the inner LFormattedText, so we need to account
16707             // for these padding shifts.
16708             rect.left += fmt.getInnerX();     // add padding left
16709             rect.top += fmt.getInnerY();      // add padding top
16710         }
16711     }
16712     rect.bottom += rect.top;
16713     rect.right += rect.left;
16714 }
16715 
16716 /// returns render data structure
getRenderData(lvdomElementFormatRec & dst)16717 void ldomNode::getRenderData( lvdomElementFormatRec & dst)
16718 {
16719     ASSERT_NODE_NOT_NULL;
16720     if ( !isElement() ) {
16721         dst.clear();
16722         return;
16723     }
16724     getDocument()->_rectStorage.getRendRectData(_handle._dataIndex, &dst);
16725 }
16726 
16727 /// sets new value for render data structure
setRenderData(lvdomElementFormatRec & newData)16728 void ldomNode::setRenderData( lvdomElementFormatRec & newData)
16729 {
16730     ASSERT_NODE_NOT_NULL;
16731     if ( !isElement() )
16732         return;
16733     getDocument()->_rectStorage.setRendRectData(_handle._dataIndex, &newData);
16734 }
16735 
16736 /// sets node rendering structure pointer
clearRenderData()16737 void ldomNode::clearRenderData()
16738 {
16739     ASSERT_NODE_NOT_NULL;
16740     if ( !isElement() )
16741         return;
16742     lvdomElementFormatRec rec;
16743     getDocument()->_rectStorage.setRendRectData(_handle._dataIndex, &rec);
16744 }
16745 /// reset node rendering structure pointer for sub-tree
clearRenderDataRecursive()16746 void ldomNode::clearRenderDataRecursive()
16747 {
16748     ASSERT_NODE_NOT_NULL;
16749     if ( !isElement() )
16750         return;
16751     lvdomElementFormatRec rec;
16752     getDocument()->_rectStorage.setRendRectData(_handle._dataIndex, &rec);
16753     int cnt = getChildCount();
16754     for (int i=0; i<cnt; i++) {
16755         ldomNode * child = getChildNode( i );
16756         if ( child->isElement() ) {
16757             child->clearRenderDataRecursive();
16758         }
16759     }
16760 }
16761 #endif
16762 
16763 /// calls specified function recursively for all elements of DOM tree, children before parent
recurseElementsDeepFirst(void (* pFun)(ldomNode * node))16764 void ldomNode::recurseElementsDeepFirst( void (*pFun)( ldomNode * node ) )
16765 {
16766     ASSERT_NODE_NOT_NULL;
16767     if ( !isElement() )
16768         return;
16769     int cnt = getChildCount();
16770     for (int i=0; i<cnt; i++)
16771     {
16772         ldomNode * child = getChildNode( i );
16773         if ( child && child->isElement() )
16774         {
16775             child->recurseElementsDeepFirst( pFun );
16776         }
16777     }
16778     pFun( this );
16779 }
16780 
16781 #if BUILD_LITE!=1
updateRendMethod(ldomNode * node)16782 static void updateRendMethod( ldomNode * node )
16783 {
16784     node->initNodeRendMethod();
16785     // Also clean up node previous positionings (they were set while in
16786     // a previous page drawing phase), that could otherwise have negative
16787     // impact on the coming rendering (noticeable with table elements).
16788     RenderRectAccessor fmt( node );
16789     fmt.clear();
16790     fmt.push();
16791 }
16792 
16793 /// init render method for the whole subtree
initNodeRendMethodRecursive()16794 void ldomNode::initNodeRendMethodRecursive()
16795 {
16796     recurseElementsDeepFirst( updateRendMethod );
16797 }
16798 #endif
16799 
16800 #if 0
16801 static void updateStyleData( ldomNode * node )
16802 {
16803     if ( node->getNodeId()==el_DocFragment )
16804         node->applyNodeStylesheet();
16805     node->initNodeStyle();
16806 }
16807 #endif
16808 
16809 #if BUILD_LITE!=1
updateStyleDataRecursive(ldomNode * node,LVDocViewCallback * progressCallback,int & lastProgressPercent)16810 static void updateStyleDataRecursive( ldomNode * node, LVDocViewCallback * progressCallback, int & lastProgressPercent )
16811 {
16812     if ( !node->isElement() )
16813         return;
16814     bool styleSheetChanged = false;
16815 
16816     // DocFragment (for epub) and body (for html) may hold some stylesheet
16817     // as first child or a link to stylesheet file in attribute
16818     if ( node->getNodeId()==el_DocFragment || node->getNodeId()==el_body ) {
16819         styleSheetChanged = node->applyNodeStylesheet();
16820         // We don't have access to much metric to show the progress of
16821         // this recursive phase. Do that anyway as we progress among
16822         // the collection of DocFragments.
16823         if ( progressCallback && node->getNodeId()==el_DocFragment ) {
16824             int nbDocFragments = node->getParentNode()->getChildCount();
16825             if (nbDocFragments == 0) // should not happen (but avoid clang-tidy warning)
16826                 nbDocFragments = 1;
16827             int percent = 100 * node->getNodeIndex() / nbDocFragments;
16828             if ( percent != lastProgressPercent ) {
16829                 progressCallback->OnNodeStylesUpdateProgress( percent );
16830                 lastProgressPercent = percent;
16831             }
16832         }
16833     }
16834 
16835     node->initNodeStyle();
16836     int n = node->getChildCount();
16837     for ( int i=0; i<n; i++ ) {
16838         ldomNode * child = node->getChildNode(i);
16839         if ( child && child->isElement() )
16840             updateStyleDataRecursive( child, progressCallback, lastProgressPercent );
16841     }
16842     if ( styleSheetChanged )
16843         node->getDocument()->getStyleSheet()->pop();
16844 }
16845 
16846 /// init render method for the whole subtree
initNodeStyleRecursive(LVDocViewCallback * progressCallback)16847 void ldomNode::initNodeStyleRecursive( LVDocViewCallback * progressCallback )
16848 {
16849     if (progressCallback)
16850         progressCallback->OnNodeStylesUpdateStart();
16851     getDocument()->_fontMap.clear();
16852     int lastProgressPercent = -1;
16853     updateStyleDataRecursive( this, progressCallback, lastProgressPercent );
16854     //recurseElements( updateStyleData );
16855     if (progressCallback)
16856         progressCallback->OnNodeStylesUpdateEnd();
16857 }
16858 #endif
16859 
16860 /// calls specified function recursively for all elements of DOM tree
recurseElements(void (* pFun)(ldomNode * node))16861 void ldomNode::recurseElements( void (*pFun)( ldomNode * node ) )
16862 {
16863     ASSERT_NODE_NOT_NULL;
16864     if ( !isElement() )
16865         return;
16866     pFun( this );
16867     int cnt = getChildCount();
16868     for (int i=0; i<cnt; i++)
16869     {
16870         ldomNode * child = getChildNode( i );
16871         if ( child->isElement() )
16872         {
16873             child->recurseElements( pFun );
16874         }
16875     }
16876 }
16877 
16878 /// calls specified function recursively for all elements of DOM tree
recurseMatchingElements(void (* pFun)(ldomNode * node),bool (* matchFun)(ldomNode * node))16879 void ldomNode::recurseMatchingElements( void (*pFun)( ldomNode * node ), bool (*matchFun)( ldomNode * node ) )
16880 {
16881     ASSERT_NODE_NOT_NULL;
16882     if ( !isElement() )
16883         return;
16884     if ( !matchFun( this ) ) {
16885         return;
16886     }
16887     pFun( this );
16888     int cnt = getChildCount();
16889     for (int i=0; i<cnt; i++)
16890     {
16891         ldomNode * child = getChildNode( i );
16892         if ( child->isElement() )
16893         {
16894             child->recurseMatchingElements( pFun, matchFun );
16895         }
16896     }
16897 }
16898 
16899 /// calls specified function recursively for all nodes of DOM tree
recurseNodes(void (* pFun)(ldomNode * node))16900 void ldomNode::recurseNodes( void (*pFun)( ldomNode * node ) )
16901 {
16902     ASSERT_NODE_NOT_NULL;
16903     pFun( this );
16904     if ( isElement() )
16905     {
16906         int cnt = getChildCount();
16907         for (int i=0; i<cnt; i++)
16908         {
16909             ldomNode * child = getChildNode( i );
16910             child->recurseNodes( pFun );
16911         }
16912     }
16913 }
16914 
16915 /// returns first text child element
getFirstTextChild(bool skipEmpty)16916 ldomNode * ldomNode::getFirstTextChild(bool skipEmpty)
16917 {
16918     ASSERT_NODE_NOT_NULL;
16919     if ( isText() ) {
16920         if ( !skipEmpty )
16921             return this;
16922         lString32 txt = getText();
16923         bool nonSpaceFound = false;
16924         for ( int i=0; i<txt.length(); i++ ) {
16925             lChar32 ch = txt[i];
16926             if ( ch!=' ' && ch!='\t' && ch!='\r' && ch!='\n' ) {
16927                 nonSpaceFound = true;
16928                 break;
16929             }
16930         }
16931         if ( nonSpaceFound )
16932             return this;
16933         return NULL;
16934     }
16935     for ( int i=0; i<(int)getChildCount(); i++ ) {
16936         ldomNode * p = getChildNode(i)->getFirstTextChild(skipEmpty);
16937         if (p)
16938             return p;
16939     }
16940     return NULL;
16941 }
16942 
16943 /// returns last text child element
getLastTextChild()16944 ldomNode * ldomNode::getLastTextChild()
16945 {
16946     ASSERT_NODE_NOT_NULL;
16947     if ( isText() )
16948         return this;
16949     else {
16950         for ( int i=(int)getChildCount()-1; i>=0; i-- ) {
16951             ldomNode * p = getChildNode(i)->getLastTextChild();
16952             if (p)
16953                 return p;
16954         }
16955     }
16956     return NULL;
16957 }
16958 
16959 
16960 #if BUILD_LITE!=1
16961 /// find node by coordinates of point in formatted document
elementFromPoint(lvPoint pt,int direction,bool strict_bounds_checking)16962 ldomNode * ldomNode::elementFromPoint( lvPoint pt, int direction, bool strict_bounds_checking )
16963 {
16964     ASSERT_NODE_NOT_NULL;
16965     if ( !isElement() )
16966         return NULL;
16967     ldomNode * enode = this;
16968     lvdom_element_render_method rm = enode->getRendMethod();
16969     if ( rm == erm_invisible ) {
16970         return NULL;
16971     }
16972 
16973     if ( rm == erm_inline ) {
16974         // We shouldn't meet erm_inline here, as our purpose is to return
16975         // a final node (so, the container of inlines), and not look further
16976         // (it's ldomDocument::createXPointer(pt) job to look at this final
16977         // node rendered content to find the exact text node and char at pt).
16978         // Except in the "pt.y is inside the box bottom overflow" case below,
16979         // and that box is erm_final (see there for more comments).
16980         // We should navigate all the erm_inline nodes, looking for
16981         // non-erm_inline ones that may be in that overflow and containt pt.
16982         // erm_inline nodes don't have a RenderRectAccessor(), so their x/y
16983         // shifts are 0, and any inner block node had its RenderRectAccessor
16984         // x/y offsets positioned related to the final block. So, no need
16985         // to shift pt: just recursively call elementFromPoint() as-is,
16986         // and we'll be recursively navigating inline nodes here.
16987         int count = getChildCount();
16988         for ( int i=0; i<count; i++ ) {
16989             ldomNode * p = getChildNode( i );
16990             ldomNode * e = p->elementFromPoint( pt, direction );
16991             if ( e ) // found it!
16992                 return e;
16993         }
16994         return NULL; // nothing found
16995     }
16996 
16997     RenderRectAccessor fmt( this );
16998 
16999     if ( BLOCK_RENDERING_N(this, ENHANCED) ) {
17000         // In enhanced rendering mode, because of collapsing of vertical margins
17001         // and the fact that we did not update style margins to their computed
17002         // values, a children box with margins can overlap its parent box, if
17003         // the child bigger margin collapsed with the parent smaller margin.
17004         // So, if we ignore the margins, there can be holes along the vertical
17005         // axis (these holes are the collapsed margins). But the content boxes
17006         // (without margins) don't overlap.
17007         if ( direction >= PT_DIR_EXACT ) { // PT_DIR_EXACT or PT_DIR_SCAN_FORWARD*
17008             // We get the parent node's children in ascending order
17009             // It could just be:
17010             //   if ( pt.y >= fmt.getY() + fmt.getHeight() )
17011             //       // Box fully before pt.y: not a candidate, next one may be
17012             //       return NULL;
17013             // but, because of possible floats overflowing their container element,
17014             // and we want to check if pt is inside any of them, we directly
17015             // check with bottom overflow included (just to avoid 2 tests
17016             // in the most common case when there is no overflow).
17017             if ( pt.y >= fmt.getY() + fmt.getHeight() + fmt.getBottomOverflow() ) {
17018                 // Box (with overflow) fully before pt.y: not a candidate, next one may be
17019                 return NULL;
17020             }
17021             if ( pt.y >= fmt.getY() + fmt.getHeight() ) { // pt.y is inside the box bottom overflow
17022                 // Get back absolute coordinates of pt
17023                 lvRect rc;
17024                 getParentNode()->getAbsRect( rc );
17025                 lvPoint pt0 = lvPoint(rc.left+pt.x, rc.top+pt.y );
17026                 // Check each of this element's children if pt is inside it (so, we'll
17027                 // go by here for each of them that has some overflow too, and that
17028                 // contributed to making this element's overflow.)
17029                 // Note that if this node is erm_final, its bottom overflow must have
17030                 // been set by some inner embedded float. But this final block's children
17031                 // are erm_inline, and the float might be deep among inlines' children.
17032                 // erm_inline nodes don't have their RenderRectAccessor set, so the
17033                 // bottom overflow is not propagated thru them, and we would be in
17034                 // the above case ("Box (with overflow) fully before pt.y"), not
17035                 // looking at inlines' children. We handle this case above (at the
17036                 // start of this function) by looking at erm_inline's children for
17037                 // non-erm_inline nodes before checking any x/y or bottom overflow.
17038                 int count = getChildCount();
17039                 for ( int i=0; i<count; i++ ) {
17040                     ldomNode * p = getChildNode( i );
17041                     // Find an inner erm_final element that has pt in it: for now, it can
17042                     // only be a float. Use PT_DIR_EXACT to really check for x boundaries.
17043                     ldomNode * e = p->elementFromPoint( lvPoint(pt.x-fmt.getX(), pt.y-fmt.getY()), PT_DIR_EXACT );
17044                     if ( e ) {
17045                         // Just to be sure, as elementFromPoint() may be a bit fuzzy in its
17046                         // checks, double check that pt is really inside that e rect.
17047                         lvRect erc;
17048                         e->getAbsRect( erc );
17049                         if ( erc.isPointInside(pt0) ) {
17050                             return e; // return this inner erm_final
17051                         }
17052                     }
17053                 }
17054                 return NULL; // Nothing found in the overflow
17055             }
17056             // There is one special case to skip: floats that may have been
17057             // positioned after their normal y (because of clear:, or because
17058             // of not enough width). Their following non-float siblings (after
17059             // in the HTML/DOM tree) may have a lower fmt.getY().
17060             if ( isFloatingBox() && pt.y < fmt.getY() ) {
17061                 // Float starts after pt.y: next non-float siblings may contain pt.y
17062                 return NULL;
17063             }
17064             // When children of the parent node have been re-ordered, we can't
17065             // trust the ordering, and if pt.y is before fmt.getY(), we might
17066             // still find it in a next node that have been re-ordered before
17067             // this one for rendering.
17068             // Note: for now, happens only with re-ordered table rows, so
17069             // we're only ensuring it here for y. This check might have to
17070             // also be done elsewhere in this function when we use it for
17071             // other things.
17072             if ( strict_bounds_checking && pt.y < fmt.getY() ) {
17073                 // Box fully after pt.y: not a candidate, next one
17074                 // (if reordered) may be
17075                 return NULL;
17076             }
17077             // pt.y is inside the box (without overflows), go on with it.
17078             // Note: we don't check for next elements which may have a top
17079             // overflow and have pt.y inside it, because it would be a bit
17080             // more twisted here, and it's less common that floats overflow
17081             // their container's top (they need to have negative margins).
17082         }
17083         else { // PT_DIR_SCAN_BACKWARD*
17084             // We get the parent node's children in descending order
17085             if ( pt.y < fmt.getY() ) {
17086                 // Box fully after pt.y: not a candidate, next one may be
17087                 return NULL;
17088             }
17089             if ( strict_bounds_checking && pt.y >= fmt.getY() + fmt.getHeight() ) {
17090                 // Box fully before pt.y: not a candidate, next one
17091                 // (if reordered) may be
17092                 return NULL;
17093             }
17094         }
17095     }
17096     else {
17097         // In legacy rendering mode, all boxes (with their margins added) touch
17098         // each other, and the boxes of children are fully contained (with
17099         // their margins added) in their parent box.
17100 
17101         // Styles margins set on <TR>, <THEAD> and the like are ignored
17102         // by table layout algorithm (as per CSS specs)
17103         // (erm_table_row_group, erm_table_header_group, erm_table_footer_group, erm_table_row)
17104         bool ignore_margins = rm >= erm_table_row_group && rm <= erm_table_row;
17105 
17106         int top_margin = ignore_margins ? 0 : lengthToPx(enode->getStyle()->margin[2], fmt.getWidth(), enode->getFont()->getSize());
17107         if ( pt.y < fmt.getY() - top_margin) {
17108             if ( direction >= PT_DIR_SCAN_FORWARD && rm == erm_final )
17109                 return this;
17110             return NULL;
17111         }
17112         int bottom_margin = ignore_margins ? 0 : lengthToPx(enode->getStyle()->margin[3], fmt.getWidth(), enode->getFont()->getSize());
17113         if ( pt.y >= fmt.getY() + fmt.getHeight() + bottom_margin ) {
17114             if ( direction <= PT_DIR_SCAN_BACKWARD && rm == erm_final )
17115                 return this;
17116             return NULL;
17117         }
17118     }
17119 
17120     if ( direction == PT_DIR_EXACT ) {
17121         // (We shouldn't check for pt.x when we are given PT_DIR_SCAN_*.
17122         // In full text search, we might not find any and get locked
17123         // on some page.)
17124         if ( pt.x >= fmt.getX() + fmt.getWidth() ) {
17125             return NULL;
17126         }
17127         if ( pt.x < fmt.getX() ) {
17128             return NULL;
17129         }
17130         // We now do this above check in all cases.
17131         // Previously:
17132         //
17133         //   We also don't need to do it if pt.x=0, which is often used
17134         //   to get current page top or range xpointers.
17135         //   We are given a x>0 when tap/hold to highlight text or find
17136         //   a link, and checking x vs fmt.x and width allows for doing
17137         //   that correctly in 2nd+ table cells.
17138         //
17139         //   No need to check if ( pt.x < fmt.getX() ): we probably
17140         //   meet the multiple elements that can be formatted on a same
17141         //   line in the order they appear as children of their parent,
17142         //   we can simply just ignore those who end before our pt.x.
17143         //   But check x if we happen to be on a floating node (which,
17144         //   with float:right, can appear first in the DOM but be
17145         //   displayed at a higher x)
17146         //    if ( pt.x < fmt.getX() && enode->isFloatingBox() ) {
17147         //        return NULL;
17148         //    }
17149         // This is no more true, now that we support RTL tables and
17150         // we can meet cells in the reverse of their logical order.
17151         // We could add more conditions (like parentNode->getRendMethod()>=erm_table),
17152         // but let's just check this in all cases when direction=0.
17153     }
17154     if ( rm == erm_final ) {
17155         // Final node, that's what we looked for
17156         return this;
17157     }
17158     // Not a final node, but a block container node that must contain
17159     // the final node we look for: check its children.
17160     int count = getChildCount();
17161     strict_bounds_checking = RENDER_RECT_HAS_FLAG(fmt, CHILDREN_RENDERING_REORDERED);
17162     if ( direction >= PT_DIR_EXACT ) { // PT_DIR_EXACT or PT_DIR_SCAN_FORWARD*
17163         for ( int i=0; i<count; i++ ) {
17164             ldomNode * p = getChildNode( i );
17165             ldomNode * e = p->elementFromPoint( lvPoint(pt.x-fmt.getX(), pt.y-fmt.getY()), direction, strict_bounds_checking );
17166             if ( e )
17167                 return e;
17168         }
17169     } else {
17170         for ( int i=count-1; i>=0; i-- ) {
17171             ldomNode * p = getChildNode( i );
17172             ldomNode * e = p->elementFromPoint( lvPoint(pt.x-fmt.getX(), pt.y-fmt.getY()), direction, strict_bounds_checking );
17173             if ( e )
17174                 return e;
17175         }
17176     }
17177     return this;
17178 }
17179 
17180 /// find final node by coordinates of point in formatted document
finalBlockFromPoint(lvPoint pt)17181 ldomNode * ldomNode::finalBlockFromPoint( lvPoint pt )
17182 {
17183     ASSERT_NODE_NOT_NULL;
17184     ldomNode * elem = elementFromPoint( pt, PT_DIR_EXACT );
17185     if ( elem && elem->getRendMethod() == erm_final )
17186         return elem;
17187     return NULL;
17188 }
17189 #endif
17190 
17191 /// returns rendering method
getRendMethod()17192 lvdom_element_render_method ldomNode::getRendMethod()
17193 {
17194     ASSERT_NODE_NOT_NULL;
17195     if ( isElement() ) {
17196 #if BUILD_LITE!=1
17197         if ( !isPersistent() ) {
17198 #endif
17199             return NPELEM->_rendMethod;
17200 #if BUILD_LITE!=1
17201         } else {
17202             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17203             return (lvdom_element_render_method)me->rendMethod;
17204         }
17205 #endif
17206     }
17207     return erm_invisible;
17208 }
17209 
17210 /// sets rendering method
setRendMethod(lvdom_element_render_method method)17211 void ldomNode::setRendMethod( lvdom_element_render_method method )
17212 {
17213     ASSERT_NODE_NOT_NULL;
17214     if ( isElement() ) {
17215 #if BUILD_LITE!=1
17216         if ( !isPersistent() ) {
17217 #endif
17218             NPELEM->_rendMethod = method;
17219 #if BUILD_LITE!=1
17220         } else {
17221             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17222             if ( me->rendMethod != method ) {
17223                 me->rendMethod = (lUInt8)method;
17224                 modified();
17225             }
17226         }
17227 #endif
17228     }
17229 }
17230 
17231 #if BUILD_LITE!=1
17232 /// returns element style record
getStyle() const17233 css_style_ref_t ldomNode::getStyle() const
17234 {
17235     ASSERT_NODE_NOT_NULL;
17236     if ( !isElement() )
17237         return css_style_ref_t();
17238     css_style_ref_t res = getDocument()->getNodeStyle( _handle._dataIndex );
17239     return res;
17240 }
17241 
17242 /// returns element font
getFont()17243 font_ref_t ldomNode::getFont()
17244 {
17245     ASSERT_NODE_NOT_NULL;
17246     if ( !isElement() )
17247         return font_ref_t();
17248     return getDocument()->getNodeFont( _handle._dataIndex );
17249 }
17250 
17251 /// sets element font
setFont(font_ref_t font)17252 void ldomNode::setFont( font_ref_t font )
17253 {
17254     ASSERT_NODE_NOT_NULL;
17255     if  ( isElement() ) {
17256         getDocument()->setNodeFont( _handle._dataIndex, font );
17257     }
17258 }
17259 
17260 /// sets element style record
setStyle(css_style_ref_t & style)17261 void ldomNode::setStyle( css_style_ref_t & style )
17262 {
17263     ASSERT_NODE_NOT_NULL;
17264     if  ( isElement() ) {
17265         getDocument()->setNodeStyle( _handle._dataIndex, style );
17266     }
17267 }
17268 
initNodeFont()17269 bool ldomNode::initNodeFont()
17270 {
17271     if ( !isElement() )
17272         return false;
17273     lUInt16 style = getDocument()->getNodeStyleIndex( _handle._dataIndex );
17274     lUInt16 font = getDocument()->getNodeFontIndex( _handle._dataIndex );
17275     lUInt16 fntIndex = getDocument()->_fontMap.get( style );
17276     if ( fntIndex==0 ) {
17277         css_style_ref_t s = getDocument()->_styles.get( style );
17278         if ( s.isNull() ) {
17279             CRLog::error("style not found for index %d", style);
17280             s = getDocument()->_styles.get( style );
17281         }
17282         LVFontRef fnt = ::getFont(s.get(), getDocument()->getFontContextDocIndex());
17283         fntIndex = (lUInt16)getDocument()->_fonts.cache( fnt );
17284         if ( fnt.isNull() ) {
17285             CRLog::error("font not found for style!");
17286             return false;
17287         } else {
17288             getDocument()->_fontMap.set(style, fntIndex);
17289         }
17290         if ( font != 0 ) {
17291             if ( font!=fntIndex ) // ???
17292                 getDocument()->_fonts.release(font);
17293         }
17294         getDocument()->setNodeFontIndex( _handle._dataIndex, fntIndex);
17295         return true;
17296     } else {
17297         if ( font!=fntIndex )
17298             getDocument()->_fonts.addIndexRef( fntIndex );
17299     }
17300     if ( fntIndex<=0 ) {
17301         CRLog::error("font caching failed for style!");
17302         return false;;
17303     } else {
17304         getDocument()->setNodeFontIndex( _handle._dataIndex, fntIndex);
17305     }
17306     return true;
17307 }
17308 
initNodeStyle()17309 void ldomNode::initNodeStyle()
17310 {
17311     // assume all parent styles already initialized
17312     if ( !getDocument()->isDefStyleSet() )
17313         return;
17314     if ( isElement() ) {
17315         if ( isRoot() || getParentNode()->isRoot() )
17316         {
17317             setNodeStyle( this,
17318                 getDocument()->getDefaultStyle(),
17319                 getDocument()->getDefaultFont()
17320             );
17321         }
17322         else
17323         {
17324             ldomNode * parent = getParentNode();
17325 
17326             // DEBUG TEST
17327             if ( parent->getChildIndex( getDataIndex() )<0 ) {
17328                 CRLog::error("Invalid parent->child relation for nodes %d->%d", parent->getDataIndex(), getDataIndex() );
17329             }
17330 
17331 
17332             //lvdomElementFormatRec * parent_fmt = node->getParentNode()->getRenderData();
17333             css_style_ref_t style = parent->getStyle();
17334             LVFontRef font = parent->getFont();
17335 #if DEBUG_DOM_STORAGE==1
17336             if ( style.isNull() ) {
17337                 // for debugging
17338                 CRLog::error("NULL style is returned for node <%s> %d level=%d  "
17339                              "parent <%s> %d level=%d children %d childIndex=%d",
17340                              LCSTR(getNodeName()), getDataIndex(), getNodeLevel(),
17341                              LCSTR(parent->getNodeName()), parent->getDataIndex(),
17342                              parent->getNodeLevel(), parent->getChildCount(), parent->getChildIndex(getDataIndex()) );
17343 
17344                 style = parent->getStyle();
17345             }
17346 #endif
17347             setNodeStyle( this,
17348                 style,
17349                 font
17350                 );
17351 #if DEBUG_DOM_STORAGE==1
17352             if ( this->getStyle().isNull() ) {
17353                 CRLog::error("NULL style is set for <%s>", LCSTR(getNodeName()) );
17354                 style = this->getStyle();
17355             }
17356 #endif
17357         }
17358     }
17359 }
17360 #endif
17361 
isBoxingNode(bool orPseudoElem) const17362 bool ldomNode::isBoxingNode( bool orPseudoElem ) const
17363 {
17364     if( isElement() ) {
17365         lUInt16 id = getNodeId();
17366         if( id >= el_autoBoxing && id <= el_inlineBox ) {
17367             return true;
17368         }
17369         if ( orPseudoElem && id == el_pseudoElem ) {
17370             return true;
17371         }
17372     }
17373     return false;
17374 }
17375 
getUnboxedParent() const17376 ldomNode * ldomNode::getUnboxedParent() const
17377 {
17378     ldomNode * parent = getParentNode();
17379     while ( parent && parent->isBoxingNode() )
17380         parent = parent->getParentNode();
17381     return parent;
17382 }
17383 
17384 // The following 4 methods are mostly used when checking CSS siblings/child
17385 // rules and counting list items siblings: we have them skip pseudoElems by
17386 // using isBoxingNode(orPseudoElem=true).
getUnboxedFirstChild(bool skip_text_nodes) const17387 ldomNode * ldomNode::getUnboxedFirstChild( bool skip_text_nodes ) const
17388 {
17389     for ( int i=0; i<getChildCount(); i++ ) {
17390         ldomNode * child = getChildNode(i);
17391         if ( child && child->isBoxingNode(true) ) {
17392             child = child->getUnboxedFirstChild( skip_text_nodes );
17393             // (child will then be NULL if it was a pseudoElem)
17394         }
17395         if ( child && (!skip_text_nodes || !child->isText()) )
17396             return child;
17397     }
17398     return NULL;
17399 }
17400 
getUnboxedLastChild(bool skip_text_nodes) const17401 ldomNode * ldomNode::getUnboxedLastChild( bool skip_text_nodes ) const
17402 {
17403     for ( int i=getChildCount()-1; i>=0; i-- ) {
17404         ldomNode * child = getChildNode(i);
17405         if ( child && child->isBoxingNode(true) ) {
17406             child = child->getUnboxedLastChild( skip_text_nodes );
17407         }
17408         if ( child && (!skip_text_nodes || !child->isText()) )
17409             return child;
17410     }
17411     return NULL;
17412 }
17413 
17414 /* For reference, a non-recursive node subtree walker:
17415     ldomNode * n = topNode;
17416     if ( n && n->getChildCount() > 0 ) {
17417         int index = 0;
17418         n = n->getChildNode(index);
17419         while ( true ) {
17420             // Check the node only the first time we meet it (index == 0) and
17421             // not when we get back to it from a child to process next sibling
17422             if ( index == 0 ) {
17423                 // Check n, process it, return it...
17424             }
17425             // Process next child
17426             if ( index < n->getChildCount() ) {
17427                 n = n->getChildNode(index);
17428                 index = 0;
17429                 continue;
17430             }
17431             // No more child, get back to parent and have it process our sibling
17432             index = n->getNodeIndex() + 1;
17433             n = n->getParentNode();
17434             if ( n == topNode && index >= n->getChildCount() )
17435                 break; // back to top node and all its children visited
17436         }
17437     }
17438 */
17439 
getUnboxedNextSibling(bool skip_text_nodes) const17440 ldomNode * ldomNode::getUnboxedNextSibling( bool skip_text_nodes ) const
17441 {
17442     // We use a variation of the above non-recursive node subtree walker,
17443     // but with an arbitrary starting node (this) inside the unboxed_parent
17444     // tree, and checks to not walk down non-boxing nodes - but still
17445     // walking up any node (which ought to be a boxing node).
17446     ldomNode * unboxed_parent = getUnboxedParent(); // don't walk outside of it
17447     if ( !unboxed_parent )
17448         return NULL;
17449     ldomNode * n = (ldomNode *) this;
17450     int index = 0;
17451     bool node_entered = true; // bootstrap loop
17452     // We may meet a same node as 'n' multiple times:
17453     // - once with node_entered=false and index being its real position inside
17454     //   its parent children collection, and we'll be "entering" it
17455     // - once with node_entered=true and index=0, meaning we have "entered" it to
17456     //   check if it's a candidate, and to possibly go on checking its own children.
17457     // - once when back from its children, with node_entered=false and index
17458     //   being that previous child index + 1, to go process its next sibling
17459     //   (or parent if no more sibling)
17460     while ( true ) {
17461         // printf("      %s\n", LCSTR(ldomXPointer(n,0).toStringV1()));
17462         if ( node_entered && n != this ) { // Don't check the starting node
17463             // Check if this node is a candidate
17464             if ( n->isText() ) { // Text nodes are not boxing nodes
17465                 if ( !skip_text_nodes )
17466                     return n;
17467             }
17468             else if ( !n->isBoxingNode(true) ) // Not a boxing node nor pseudoElem
17469                 return n;
17470             // Otherwise, this node is a boxing node (or a text node or a pseudoElem
17471             // with no child, and we'll get back to its parent)
17472         }
17473         // Enter next node, and re-loop to have it checked
17474         // - if !node_entered : n is the parent and index points to the next child
17475         //   we want to check
17476         // - if n->isBoxingNode() (and node_entered=true, and index=0): enter the first
17477         //   child of this boxingNode (not if pseudoElem, that doesn't box anything)
17478         if ( (!node_entered || n->isBoxingNode()) && index < n->getChildCount() ) {
17479             n = n->getChildNode(index);
17480             index = 0;
17481             node_entered = true;
17482             continue;
17483         }
17484         // No more sibling/child to check, get back to parent and have it
17485         // process n's next sibling
17486         index = n->getNodeIndex() + 1;
17487         n = n->getParentNode();
17488         node_entered = false;
17489         if ( n == unboxed_parent && index >= n->getChildCount() ) {
17490             // back to real parent node and no more child to check
17491             break;
17492         }
17493     }
17494     return NULL;
17495 }
17496 
getUnboxedPrevSibling(bool skip_text_nodes) const17497 ldomNode * ldomNode::getUnboxedPrevSibling( bool skip_text_nodes ) const
17498 {
17499     // Similar to getUnboxedNextSibling(), but walking backward
17500     ldomNode * unboxed_parent = getUnboxedParent();
17501     if ( !unboxed_parent )
17502         return NULL;
17503     ldomNode * n = (ldomNode *) this;
17504     int index = 0;
17505     bool node_entered = true; // bootstrap loop
17506     while ( true ) {
17507         // printf("      %s\n", LCSTR(ldomXPointer(n,0).toStringV1()));
17508         if ( node_entered && n != this ) {
17509             if ( n->isText() ) {
17510                 if ( !skip_text_nodes )
17511                     return n;
17512             }
17513             else if ( !n->isBoxingNode(true) )
17514                 return n;
17515         }
17516         if ( (!node_entered || n->isBoxingNode()) && index >= 0 && index < n->getChildCount() ) {
17517             n = n->getChildNode(index);
17518             index = n->getChildCount() - 1;
17519             node_entered = true;
17520             continue;
17521         }
17522         index = n->getNodeIndex() - 1;
17523         n = n->getParentNode();
17524         node_entered = false;
17525         if ( n == unboxed_parent && index < 0 ) {
17526             break;
17527         }
17528     }
17529     return NULL;
17530 }
17531 
17532 /// for display:list-item node, get marker
getNodeListMarker(int & counterValue,lString32 & marker,int & markerWidth)17533 bool ldomNode::getNodeListMarker( int & counterValue, lString32 & marker, int & markerWidth )
17534 {
17535 #if BUILD_LITE!=1
17536     css_style_ref_t s = getStyle();
17537     marker.clear();
17538     markerWidth = 0;
17539     if ( s.isNull() )
17540         return false;
17541     css_list_style_type_t st = s->list_style_type;
17542     switch ( st ) {
17543     default:
17544         // treat default as disc
17545     case css_lst_disc:
17546         marker = U"\x2022"; // U"\x25CF" U"\x26AB" (medium circle) U"\x2981" (spot) U"\x2022" (bullet, small)
17547         break;
17548     case css_lst_circle:
17549         marker = U"\x25E6"; // U"\x25CB" U"\x26AA (medium) U"\25E6" (bullet) U"\x26AC (medium small)
17550         break;
17551     case css_lst_square:
17552         marker = U"\x25AA"; // U"\x25A0" U"\x25FE" (medium small) U"\x25AA" (small)
17553         break;
17554     case css_lst_none:
17555         // When css_lsp_inside, no space is used by the invisible marker
17556         if ( s->list_style_position != css_lsp_inside ) {
17557             marker = U"\x0020";
17558         }
17559         break;
17560     case css_lst_decimal:
17561     case css_lst_lower_roman:
17562     case css_lst_upper_roman:
17563     case css_lst_lower_alpha:
17564     case css_lst_upper_alpha:
17565         do {
17566             // If this element has a valid value then use it avoiding a walk.
17567             lString32 el_value = getAttributeValue(attr_value);
17568             if ( !el_value.empty() ) {
17569                 int el_ivalue;
17570                 if ( el_value.atoi(el_ivalue) ) {
17571                     counterValue = el_ivalue;
17572                     break;
17573                 }
17574             }
17575 
17576             // The UL > LI parent-child chain may have had some of our Boxing elements inserted
17577             ldomNode * parent = getUnboxedParent();
17578 
17579             // See if parent has a 'reversed' attribute.
17580             int increment = parent->hasAttribute(attr_reversed) ? -1 : +1;
17581 
17582             // If the caller passes in a non-zero counter then it is assumed
17583             // have been already calculated and have the value of the prior
17584             // element of a walk. There may be a redundant recalculation in
17585             // the case of zero.
17586             if ( counterValue != 0 ) {
17587                 counterValue += increment;
17588                 break;
17589             }
17590 
17591             // See if parent has a valid 'start' attribute.
17592             // https://www.w3.org/TR/html5/grouping-content.html#the-ol-element
17593             // "The start attribute, if present, must be a valid integer giving the ordinal value of the first list item."
17594             lString32 start_value = parent->getAttributeValue(attr_start);
17595             int istart;
17596             if ( !start_value.empty() && start_value.atoi(istart) )
17597                 counterValue = istart;
17598             else if ( increment > 0 )
17599                 counterValue = 1;
17600             else  {
17601                 // For a reversed ordering the default start is equal to the
17602                 // number of child elements.
17603                 counterValue = 0;
17604 
17605                 ldomNode * sibling = parent->getUnboxedFirstChild(true);
17606                 while ( sibling ) {
17607                     css_style_ref_t cs = sibling->getStyle();
17608                     if ( cs.isNull() ) { // Should not happen, but let's be sure
17609                         if ( sibling == this )
17610                             break;
17611                         sibling = sibling->getUnboxedNextSibling(true);
17612                         continue;
17613                     }
17614                     if ( cs->display != css_d_list_item_block && cs->display != css_d_list_item_legacy) {
17615                         // Alien element among list item nodes, skip it to not mess numbering
17616                         if ( sibling == this ) // Should not happen, but let's be sure
17617                             break;
17618                         sibling = sibling->getUnboxedNextSibling(true);
17619                         continue;
17620                     }
17621                     counterValue++;
17622                     sibling = sibling->getUnboxedNextSibling(true); // skip text nodes
17623                 }
17624             }
17625 
17626             // iterate parent's real children from start up to this node
17627             counterValue -= increment;
17628             ldomNode * sibling = parent->getUnboxedFirstChild(true);
17629             while ( sibling ) {
17630                 css_style_ref_t cs = sibling->getStyle();
17631                 if ( cs.isNull() ) { // Should not happen, but let's be sure
17632                     if ( sibling == this )
17633                         break;
17634                     sibling = sibling->getUnboxedNextSibling(true);
17635                     continue;
17636                 }
17637                 if ( cs->display != css_d_list_item_block && cs->display != css_d_list_item_legacy) {
17638                     // Alien element among list item nodes, skip it to not mess numbering
17639                     if ( sibling == this ) // Should not happen, but let's be sure
17640                         break;
17641                     sibling = sibling->getUnboxedNextSibling(true);
17642                     continue;
17643                 }
17644 
17645                 // Count advances irrespective of the list style.
17646                 counterValue += increment;
17647 
17648                 // See if it has a 'value' attribute that overrides the incremented value
17649                 // https://www.w3.org/TR/html5/grouping-content.html#the-li-element
17650                 // "The value attribute, if present, must be a valid integer giving the ordinal value of the list item."
17651                 lString32 value = sibling->getAttributeValue(attr_value);
17652                 if ( !value.empty() ) {
17653                     int ivalue;
17654                     if ( value.atoi(ivalue) )
17655                         counterValue = ivalue;
17656                 }
17657                 if ( sibling == this )
17658                     break;
17659                 sibling = sibling->getUnboxedNextSibling(true); // skip text nodes
17660             }
17661         } while (0);
17662 
17663         static const char * lower_roman[] = {"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix",
17664                                              "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix",
17665                                          "xx", "xxi", "xxii", "xxiii"};
17666         switch (st) {
17667         case css_lst_decimal:
17668             marker = lString32::itoa(counterValue);
17669             marker << '.';
17670             break;
17671         case css_lst_lower_roman:
17672             if (counterValue > 0 &&
17673                 counterValue - 1 < (int)(sizeof(lower_roman) / sizeof(lower_roman[0])))
17674                 marker = lString32(lower_roman[counterValue-1]);
17675             else
17676                 marker = lString32::itoa(counterValue); // fallback to simple counter
17677             marker << '.';
17678             break;
17679         case css_lst_upper_roman:
17680             if (counterValue > 0 &&
17681                 counterValue - 1 < (int)(sizeof(lower_roman) / sizeof(lower_roman[0])))
17682                 marker = lString32(lower_roman[counterValue-1]);
17683             else
17684                 marker = lString32::itoa(counterValue); // fallback to simple digital counter
17685             marker.uppercase();
17686             marker << '.';
17687             break;
17688         case css_lst_lower_alpha:
17689             if ( counterValue > 0 && counterValue<=26 )
17690                 marker.append(1, (lChar32)('a' + counterValue - 1));
17691             else
17692                 marker = lString32::itoa(counterValue); // fallback to simple digital counter
17693             marker << '.';
17694             break;
17695         case css_lst_upper_alpha:
17696             if ( counterValue > 0 && counterValue<=26 )
17697                 marker.append(1, (lChar32)('A' + counterValue - 1));
17698             else
17699                 marker = lString32::itoa(counterValue); // fallback to simple digital counter
17700             marker << '.';
17701             break;
17702         case css_lst_disc:
17703         case css_lst_circle:
17704         case css_lst_square:
17705         case css_lst_none:
17706         case css_lst_inherit:
17707             // do nothing
17708             break;
17709         }
17710         break;
17711     }
17712     bool res = false;
17713     if ( !marker.empty() ) {
17714         LVFontRef font = getFont();
17715         if ( !font.isNull() ) {
17716             TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg( this );
17717             markerWidth = font->getTextWidth((marker + "  ").c_str(), marker.length()+2, lang_cfg) + font->getSize()/8;
17718             res = true;
17719         } else {
17720             marker.clear();
17721         }
17722     }
17723     return res;
17724 #else
17725     marker = cs32("*");
17726     return true;
17727 #endif
17728 }
17729 
17730 
17731 /// returns first child node
getFirstChild() const17732 ldomNode * ldomNode::getFirstChild() const
17733 {
17734     ASSERT_NODE_NOT_NULL;
17735     if  ( isElement() ) {
17736 #if BUILD_LITE!=1
17737         if ( !isPersistent() ) {
17738 #endif
17739             tinyElement * me = NPELEM;
17740             if ( me->_children.length() )
17741                 return getDocument()->getTinyNode(me->_children[0]);
17742 #if BUILD_LITE!=1
17743         } else {
17744             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17745             if ( me->childCount )
17746                 return getDocument()->getTinyNode(me->children[0]);
17747         }
17748 #endif
17749     }
17750     return NULL;
17751 }
17752 
17753 /// returns last child node
getLastChild() const17754 ldomNode * ldomNode::getLastChild() const
17755 {
17756     ASSERT_NODE_NOT_NULL;
17757     if  ( isElement() ) {
17758 #if BUILD_LITE!=1
17759         if ( !isPersistent() ) {
17760 #endif
17761             tinyElement * me = NPELEM;
17762             if ( me->_children.length() )
17763                 return getDocument()->getTinyNode(me->_children[me->_children.length()-1]);
17764 #if BUILD_LITE!=1
17765         } else {
17766             ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17767             if ( me->childCount )
17768                 return getDocument()->getTinyNode(me->children[me->childCount-1]);
17769         }
17770 #endif
17771     }
17772     return NULL;
17773 }
17774 
17775 /// removes and deletes last child element
removeLastChild()17776 void ldomNode::removeLastChild()
17777 {
17778     ASSERT_NODE_NOT_NULL;
17779     if ( hasChildren() ) {
17780         ldomNode * lastChild = removeChild( getChildCount() - 1 );
17781         lastChild->destroy();
17782     }
17783 }
17784 
17785 /// add child
addChild(lInt32 childNodeIndex)17786 void ldomNode::addChild( lInt32 childNodeIndex )
17787 {
17788     ASSERT_NODE_NOT_NULL;
17789     if ( !isElement() )
17790         return;
17791     if ( isPersistent() )
17792         modify(); // convert to mutable element
17793     tinyElement * me = NPELEM;
17794     me->_children.add( childNodeIndex );
17795 }
17796 
17797 /// move range of children startChildIndex to endChildIndex inclusively to specified element
moveItemsTo(ldomNode * destination,int startChildIndex,int endChildIndex)17798 void ldomNode::moveItemsTo( ldomNode * destination, int startChildIndex, int endChildIndex )
17799 {
17800     ASSERT_NODE_NOT_NULL;
17801     if ( !isElement() )
17802         return;
17803     if ( isPersistent() )
17804         modify();
17805 
17806 #ifdef TRACE_AUTOBOX
17807     CRLog::debug( "moveItemsTo() invoked from %d to %d", getDataIndex(), destination->getDataIndex() );
17808 #endif
17809     //if ( getDataIndex()==INDEX2 || getDataIndex()==INDEX1) {
17810     //    CRLog::trace("nodes from element %d are being moved", getDataIndex());
17811     //}
17812 /*#ifdef _DEBUG
17813     if ( !_document->checkConsistency( false ) )
17814         CRLog::error("before moveItemsTo");
17815 #endif*/
17816     int len = endChildIndex - startChildIndex + 1;
17817     tinyElement * me = NPELEM;
17818     for ( int i=0; i<len; i++ ) {
17819         ldomNode * item = getChildNode( startChildIndex );
17820         //if ( item->getDataIndex()==INDEX2 || item->getDataIndex()==INDEX1 ) {
17821         //    CRLog::trace("node %d is being moved", item->getDataIndex() );
17822         //}
17823         me->_children.remove( startChildIndex ); // + i
17824         item->setParentNode(destination);
17825         destination->addChild( item->getDataIndex() );
17826     }
17827     // TODO: renumber rest of children in necessary
17828 /*#ifdef _DEBUG
17829     if ( !_document->checkConsistency( false ) )
17830         CRLog::error("after moveItemsTo");
17831 #endif*/
17832 
17833 }
17834 
17835 /// find child element by tag id
findChildElement(lUInt16 nsid,lUInt16 id,int index)17836 ldomNode * ldomNode::findChildElement( lUInt16 nsid, lUInt16 id, int index )
17837 {
17838     ASSERT_NODE_NOT_NULL;
17839     if ( !isElement() )
17840         return NULL;
17841     ldomNode * res = NULL;
17842     int k = 0;
17843     int childCount = getChildCount();
17844     for ( int i=0; i<childCount; i++ )
17845     {
17846         ldomNode * p = getChildNode( i );
17847         if ( !p->isElement() )
17848             continue;
17849         if ( p->getNodeId() == id && ( (p->getNodeNsId() == nsid) || (nsid==LXML_NS_ANY) ) )
17850         {
17851             if ( k==index || index==-1 ) {
17852                 res = p;
17853                 break;
17854             }
17855             k++;
17856         }
17857     }
17858     if (!res) //  || (index==-1 && k>1)  // DON'T CHECK WHETHER OTHER ELEMENTS EXIST
17859         return NULL;
17860     return res;
17861 }
17862 
17863 /// find child element by id path
findChildElement(lUInt16 idPath[])17864 ldomNode * ldomNode::findChildElement( lUInt16 idPath[] )
17865 {
17866     ASSERT_NODE_NOT_NULL;
17867     if ( !isElement() )
17868         return NULL;
17869     ldomNode * elem = this;
17870     for ( int i=0; idPath[i]; i++ ) {
17871         elem = elem->findChildElement( LXML_NS_ANY, idPath[i], -1 );
17872         if ( !elem )
17873             return NULL;
17874     }
17875     return elem;
17876 }
17877 
17878 /// inserts child element
insertChildElement(lUInt32 index,lUInt16 nsid,lUInt16 id)17879 ldomNode * ldomNode::insertChildElement( lUInt32 index, lUInt16 nsid, lUInt16 id )
17880 {
17881     ASSERT_NODE_NOT_NULL;
17882     if  ( isElement() ) {
17883         if ( isPersistent() )
17884             modify();
17885         tinyElement * me = NPELEM;
17886         if (index>(lUInt32)me->_children.length())
17887             index = me->_children.length();
17888         ldomNode * node = getDocument()->allocTinyElement( this, nsid, id );
17889         me->_children.insert( index, node->getDataIndex() );
17890         return node;
17891     }
17892     readOnlyError();
17893     return NULL;
17894 }
17895 
17896 /// inserts child element
insertChildElement(lUInt16 id)17897 ldomNode * ldomNode::insertChildElement( lUInt16 id )
17898 {
17899     ASSERT_NODE_NOT_NULL;
17900     if  ( isElement() ) {
17901         if ( isPersistent() )
17902             modify();
17903         ldomNode * node = getDocument()->allocTinyElement( this, LXML_NS_NONE, id );
17904         NPELEM->_children.insert( NPELEM->_children.length(), node->getDataIndex() );
17905         return node;
17906     }
17907     readOnlyError();
17908     return NULL;
17909 }
17910 
17911 /// inserts child text
insertChildText(lUInt32 index,const lString32 & value)17912 ldomNode * ldomNode::insertChildText( lUInt32 index, const lString32 & value )
17913 {
17914     ASSERT_NODE_NOT_NULL;
17915     if  ( isElement() ) {
17916         if ( isPersistent() )
17917             modify();
17918         tinyElement * me = NPELEM;
17919         if (index>(lUInt32)me->_children.length())
17920             index = me->_children.length();
17921 #if !defined(USE_PERSISTENT_TEXT) || BUILD_LITE==1
17922         ldomNode * node = getDocument()->allocTinyNode( NT_TEXT );
17923         lString8 s8 = UnicodeToUtf8(value);
17924         node->_data._text_ptr = new ldomTextNode(_handle._dataIndex, s8);
17925 #else
17926         ldomNode * node = getDocument()->allocTinyNode( NT_PTEXT );
17927         //node->_data._ptext_addr._parentIndex = _handle._dataIndex;
17928         lString8 s8 = UnicodeToUtf8(value);
17929         node->_data._ptext_addr = getDocument()->_textStorage.allocText( node->_handle._dataIndex, _handle._dataIndex, s8 );
17930 #endif
17931         me->_children.insert( index, node->getDataIndex() );
17932         return node;
17933     }
17934     readOnlyError();
17935     return NULL;
17936 }
17937 
17938 /// inserts child text
insertChildText(const lString32 & value)17939 ldomNode * ldomNode::insertChildText( const lString32 & value )
17940 {
17941     ASSERT_NODE_NOT_NULL;
17942     if  ( isElement() ) {
17943         if ( isPersistent() )
17944             modify();
17945         tinyElement * me = NPELEM;
17946 #if !defined(USE_PERSISTENT_TEXT) || BUILD_LITE==1
17947         ldomNode * node = getDocument()->allocTinyNode( NT_TEXT );
17948         lString8 s8 = UnicodeToUtf8(value);
17949         node->_data._text_ptr = new ldomTextNode(_handle._dataIndex, s8);
17950 #else
17951         ldomNode * node = getDocument()->allocTinyNode( NT_PTEXT );
17952         lString8 s8 = UnicodeToUtf8(value);
17953         node->_data._ptext_addr = getDocument()->_textStorage.allocText( node->_handle._dataIndex, _handle._dataIndex, s8 );
17954 #endif
17955         me->_children.insert( me->_children.length(), node->getDataIndex() );
17956         return node;
17957     }
17958     readOnlyError();
17959     return NULL;
17960 }
17961 
17962 /// inserts child text
insertChildText(const lString8 & s8,bool before_last_child)17963 ldomNode * ldomNode::insertChildText(const lString8 & s8, bool before_last_child)
17964 {
17965     ASSERT_NODE_NOT_NULL;
17966     if  ( isElement() ) {
17967         if ( isPersistent() )
17968             modify();
17969         tinyElement * me = NPELEM;
17970 #if !defined(USE_PERSISTENT_TEXT) || BUILD_LITE==1
17971         ldomNode * node = getDocument()->allocTinyNode( NT_TEXT );
17972         node->_data._text_ptr = new ldomTextNode(_handle._dataIndex, s8);
17973 #else
17974         ldomNode * node = getDocument()->allocTinyNode( NT_PTEXT );
17975         node->_data._ptext_addr = getDocument()->_textStorage.allocText( node->_handle._dataIndex, _handle._dataIndex, s8 );
17976 #endif
17977         int index = me->_children.length();
17978         if ( before_last_child && index > 0 )
17979             index--;
17980         me->_children.insert( index, node->getDataIndex() );
17981         return node;
17982     }
17983     readOnlyError();
17984     return NULL;
17985 }
17986 
17987 /// remove child
removeChild(lUInt32 index)17988 ldomNode * ldomNode::removeChild( lUInt32 index )
17989 {
17990     ASSERT_NODE_NOT_NULL;
17991     if  ( isElement() ) {
17992         if ( isPersistent() )
17993             modify();
17994         lUInt32 removedIndex = NPELEM->_children.remove(index);
17995         ldomNode * node = getTinyNode( removedIndex );
17996         return node;
17997     }
17998     readOnlyError();
17999     return NULL;
18000 }
18001 
18002 /// creates stream to read base64 encoded data from element
createBase64Stream()18003 LVStreamRef ldomNode::createBase64Stream()
18004 {
18005     ASSERT_NODE_NOT_NULL;
18006     if ( !isElement() )
18007         return LVStreamRef();
18008 #define DEBUG_BASE64_IMAGE 0
18009 #if DEBUG_BASE64_IMAGE==1
18010     lString32 fname = getAttributeValue( attr_id );
18011     lString8 fname8 = UnicodeToUtf8( fname );
18012     LVStreamRef ostream = LVOpenFileStream( fname.empty() ? U"image.png" : fname.c_str(), LVOM_WRITE );
18013     printf("createBase64Stream(%s)\n", fname8.c_str());
18014 #endif
18015     LVStream * stream = new LVBase64NodeStream( this );
18016     if ( stream->GetSize()==0 )
18017     {
18018 #if DEBUG_BASE64_IMAGE==1
18019         printf("    cannot create base64 decoder stream!!!\n");
18020 #endif
18021         delete stream;
18022         return LVStreamRef();
18023     }
18024     LVStreamRef istream( stream );
18025 
18026 #if DEBUG_BASE64_IMAGE==1
18027     LVPumpStream( ostream, istream );
18028     istream->SetPos(0);
18029 #endif
18030 
18031     return istream;
18032 }
18033 
18034 #if BUILD_LITE!=1
18035 
18036 class NodeImageProxy : public LVImageSource
18037 {
18038     ldomNode * _node;
18039     lString32 _refName;
18040     int _dx;
18041     int _dy;
18042 public:
NodeImageProxy(ldomNode * node,lString32 refName,int dx,int dy)18043     NodeImageProxy( ldomNode * node, lString32 refName, int dx, int dy )
18044         : _node(node), _refName(refName), _dx(dx), _dy(dy)
18045     {
18046 
18047     }
18048 
GetSourceNode()18049     virtual ldomNode * GetSourceNode()
18050     {
18051         return NULL;
18052     }
GetSourceStream()18053     virtual LVStream * GetSourceStream()
18054     {
18055         return NULL;
18056     }
18057 
Compact()18058     virtual void   Compact() { }
GetWidth()18059     virtual int    GetWidth() { return _dx; }
GetHeight()18060     virtual int    GetHeight() { return _dy; }
Decode(LVImageDecoderCallback * callback)18061     virtual bool   Decode( LVImageDecoderCallback * callback )
18062     {
18063         LVImageSourceRef img = _node->getDocument()->getObjectImageSource(_refName);
18064         if ( img.isNull() )
18065             return false;
18066         return img->Decode(callback);
18067     }
~NodeImageProxy()18068     virtual ~NodeImageProxy()
18069     {
18070 
18071     }
18072 };
18073 
18074 /// returns object image ref name
getObjectImageRefName(bool percentDecode)18075 lString32 ldomNode::getObjectImageRefName(bool percentDecode)
18076 {
18077     if (!isElement())
18078         return lString32::empty_str;
18079     //printf("ldomElement::getObjectImageSource() ... ");
18080     const css_elem_def_props_t * et = getDocument()->getElementTypePtr(getNodeId());
18081     if (!et || !et->is_object)
18082         return lString32::empty_str;
18083     lUInt16 hrefId = getDocument()->getAttrNameIndex("href");
18084     lUInt16 srcId = getDocument()->getAttrNameIndex("src");
18085     lUInt16 recIndexId = getDocument()->getAttrNameIndex("recindex");
18086     lString32 refName = getAttributeValue( getDocument()->getNsNameIndex("xlink"),
18087         hrefId );
18088 
18089     if ( refName.empty() )
18090         refName = getAttributeValue( getDocument()->getNsNameIndex("l"), hrefId );
18091     if ( refName.empty() )
18092         refName = getAttributeValue( LXML_NS_ANY, hrefId ); //LXML_NS_NONE
18093     if ( refName.empty() )
18094         refName = getAttributeValue( LXML_NS_ANY, srcId ); //LXML_NS_NONE
18095     if (refName.empty()) {
18096         lString32 recindex = getAttributeValue( LXML_NS_ANY, recIndexId );
18097         if (!recindex.empty()) {
18098             int n;
18099             if (recindex.atoi(n)) {
18100                 refName = lString32(MOBI_IMAGE_NAME_PREFIX) + fmt::decimal(n);
18101                 //CRLog::debug("get mobi image %s", LCSTR(refName));
18102             }
18103         }
18104 //        else {
18105 //            for (int k=0; k<getAttrCount(); k++) {
18106 //                CRLog::debug("attr %s=%s", LCSTR(getAttributeName(k)), LCSTR(getAttributeValue(getAttributeName(k).c_str())));
18107 //            }
18108 //        }
18109     }
18110     if ( refName.length()<2 )
18111         return lString32::empty_str;
18112     if (percentDecode)
18113         refName = DecodeHTMLUrlString(refName);
18114     return refName;
18115 }
18116 
18117 
18118 /// returns object image stream
getObjectImageStream()18119 LVStreamRef ldomNode::getObjectImageStream()
18120 {
18121     lString32 refName = getObjectImageRefName();
18122     if ( refName.empty() )
18123         return LVStreamRef();
18124     return getDocument()->getObjectImageStream( refName );
18125 }
18126 
18127 
18128 /// returns object image source
getObjectImageSource()18129 LVImageSourceRef ldomNode::getObjectImageSource()
18130 {
18131     lString32 refName = getObjectImageRefName(true);
18132     LVImageSourceRef ref;
18133     if ( refName.empty() )
18134         return ref;
18135     ref = getDocument()->getObjectImageSource( refName );
18136     if (ref.isNull()) {
18137         // try again without percent decoding (for fb3)
18138         refName = getObjectImageRefName(false);
18139         if ( refName.empty() )
18140             return ref;
18141         ref = getDocument()->getObjectImageSource( refName );
18142     }
18143     if ( !ref.isNull() ) {
18144         int dx = ref->GetWidth();
18145         int dy = ref->GetHeight();
18146         ref = LVImageSourceRef( new NodeImageProxy(this, refName, dx, dy) );
18147     } else {
18148         CRLog::error("ObjectImageSource cannot be opened by name %s", LCSTR(refName));
18149     }
18150 
18151     getDocument()->_urlImageMap.set( refName, ref );
18152     return ref;
18153 }
18154 
18155 /// register embedded document fonts in font manager, if any exist in document
registerEmbeddedFonts()18156 void ldomDocument::registerEmbeddedFonts()
18157 {
18158     if (_fontList.empty())
18159         return;
18160     int list = _fontList.length();
18161     lString8 x=lString8("");
18162     lString32Collection flist;
18163     fontMan->getFaceList(flist);
18164     int cnt = flist.length();
18165     for (int i = 0; i < list; i++) {
18166         LVEmbeddedFontDef *item = _fontList.get(i);
18167         lString32 url = item->getUrl();
18168         lString8 face = item->getFace();
18169         if (face.empty()) {
18170             for (int a=i+1;a<list;a++){
18171                 lString8 tmp=_fontList.get(a)->getFace();
18172                 if (!tmp.empty()) {face=tmp;break;}
18173             }
18174         }
18175         if ((!x.empty() && x.pos(face)!=-1) || url.empty()) {
18176             continue;
18177         }
18178         if (url.startsWithNoCase(lString32("res://")) || url.startsWithNoCase(lString32("file://"))) {
18179             if (!fontMan->RegisterExternalFont(item->getUrl(), item->getFace(), item->getBold(), item->getItalic())) {
18180                 //CRLog::error("Failed to register external font face: %s file: %s", item->getFace().c_str(), LCSTR(item->getUrl()));
18181             }
18182             continue;
18183         }
18184         else {
18185             if (!fontMan->RegisterDocumentFont(getDocIndex(), _container, item->getUrl(), item->getFace(), item->getBold(), item->getItalic())) {
18186                 //CRLog::error("Failed to register document font face: %s file: %s", item->getFace().c_str(), LCSTR(item->getUrl()));
18187                 lString32 fontface = lString32("");
18188                 for (int j = 0; j < cnt; j = j + 1) {
18189                 fontface = flist[j];
18190                 do { (fontface.replace(lString32(" "), lString32("\0"))); }
18191                 while (fontface.pos(lString32(" ")) != -1);
18192                 do { (url.replace(lString32(" "), lString32("\0"))); }
18193                 while (url.pos(lString32(" ")) != -1);
18194                  if (fontface.lowercase().pos(url.lowercase()) != -1) {
18195                     if(fontMan->SetAlias(face, UnicodeToLocal(flist[j]), getDocIndex(),item->getBold(),item->getItalic())){
18196                     x.append(face).append(lString8(","));
18197                         CRLog::debug("font-face %s matches local font %s",face.c_str(),LCSTR(flist[j]));
18198                     break;}
18199                  }
18200                 }
18201             }
18202         }
18203     }
18204 }
18205 /// unregister embedded document fonts in font manager, if any exist in document
unregisterEmbeddedFonts()18206 void ldomDocument::unregisterEmbeddedFonts()
18207 {
18208     fontMan->UnregisterDocumentFonts(_docIndex);
18209 }
18210 
18211 /// returns object image stream
getObjectImageStream(lString32 refName)18212 LVStreamRef ldomDocument::getObjectImageStream( lString32 refName )
18213 {
18214     LVStreamRef ref;
18215     if ( refName.startsWith(lString32(BLOB_NAME_PREFIX)) ) {
18216         return _blobCache.getBlob(refName);
18217     }
18218     if ( refName.length() > 10 && refName[4] == ':' && refName.startsWith(lString32("...>
18220         lString32 data = refName.substr(0, 50);
18221         int pos = data.pos(U";base64,");
18222         if ( pos > 0 ) {
18223             lString8 b64data = UnicodeToLocal(refName.substr(pos+8));
18224             ref = LVStreamRef(new LVBase64Stream(b64data));
18225             return ref;
18226         }
18227     }
18228     if ( refName[0]!='#' ) {
18229         if ( !getContainer().isNull() ) {
18230             lString32 name = refName;
18231             if ( !getCodeBase().empty() )
18232                 name = getCodeBase() + refName;
18233             ref = getContainer()->OpenStream(name.c_str(), LVOM_READ);
18234             if ( ref.isNull() ) {
18235                 lString32 fname = getProps()->getStringDef( DOC_PROP_FILE_NAME, "" );
18236                 fname = LVExtractFilenameWithoutExtension(fname);
18237                 if ( !fname.empty() ) {
18238                     lString32 fn = fname + "_img";
18239 //                    if ( getContainer()->GetObjectInfo(fn) ) {
18240 
18241 //                    }
18242                     lString32 name = fn + "/" + refName;
18243                     if ( !getCodeBase().empty() )
18244                         name = getCodeBase() + name;
18245                     ref = getContainer()->OpenStream(name.c_str(), LVOM_READ);
18246                 }
18247             }
18248             if ( ref.isNull() )
18249                 CRLog::error("Cannot open stream by name %s", LCSTR(name));
18250         }
18251         return ref;
18252     }
18253     lUInt32 refValueId = findAttrValueIndex( refName.c_str() + 1 );
18254     if ( refValueId == (lUInt32)-1 ) {
18255         return ref;
18256     }
18257     ldomNode * objnode = getNodeById( refValueId );
18258     if ( !objnode || !objnode->isElement())
18259         return ref;
18260     ref = objnode->createBase64Stream();
18261     return ref;
18262 }
18263 
18264 /// returns object image source
getObjectImageSource(lString32 refName)18265 LVImageSourceRef ldomDocument::getObjectImageSource( lString32 refName )
18266 {
18267     LVStreamRef stream = getObjectImageStream( refName );
18268     if (stream.isNull())
18269          return LVImageSourceRef();
18270     return LVCreateStreamImageSource( stream );
18271 }
18272 
resetNodeNumberingProps()18273 void ldomDocument::resetNodeNumberingProps()
18274 {
18275     lists.clear();
18276 }
18277 
getNodeNumberingProps(lUInt32 nodeDataIndex)18278 ListNumberingPropsRef ldomDocument::getNodeNumberingProps( lUInt32 nodeDataIndex )
18279 {
18280     return lists.get(nodeDataIndex);
18281 }
18282 
setNodeNumberingProps(lUInt32 nodeDataIndex,ListNumberingPropsRef v)18283 void ldomDocument::setNodeNumberingProps( lUInt32 nodeDataIndex, ListNumberingPropsRef v )
18284 {
18285     lists.set(nodeDataIndex, v);
18286 }
18287 
18288 /// returns the sum of this node and its parents' top and bottom margins, borders and paddings
getSurroundingAddedHeight()18289 int ldomNode::getSurroundingAddedHeight()
18290 {
18291     int h = 0;
18292     ldomNode * n = this;
18293     while (true) {
18294         ldomNode * parent = n->getParentNode();
18295         lvdom_element_render_method rm = n->getRendMethod();
18296         if ( rm != erm_inline && rm != erm_invisible && rm != erm_killed) {
18297             // Add offset of border and padding
18298             int base_width = 0;
18299             if ( parent && !(parent->isNull()) ) {
18300                 // margins and padding in % are scaled according to parent's width
18301                 RenderRectAccessor fmt( parent );
18302                 base_width = fmt.getWidth();
18303             }
18304             int em = n->getFont()->getSize();
18305             css_style_ref_t style = n->getStyle();
18306             h += lengthToPx( style->margin[2], base_width, em );  // top margin
18307             h += lengthToPx( style->margin[3], base_width, em );  // bottom margin
18308             h += lengthToPx( style->padding[2], base_width, em ); // top padding
18309             h += lengthToPx( style->padding[3], base_width, em ); // bottom padding
18310             h += measureBorder(n, 0); // top border
18311             h += measureBorder(n, 2); // bottom border
18312         }
18313         if ( !parent || parent->isNull() )
18314             break;
18315         n = parent;
18316     }
18317     return h;
18318 }
18319 
18320 /// formats final block
18321 // 'fmt' is the rect of the block node, and MUST have its width set
18322 // (as ::renderFinalBlock( this, f.get(), fmt...) needs it to compute text-indent in %
18323 // 'int width' is the available width for the inner content, and so
18324 // caller must exclude block node padding from it.
renderFinalBlock(LFormattedTextRef & frmtext,RenderRectAccessor * fmt,int width,BlockFloatFootprint * float_footprint)18325 int ldomNode::renderFinalBlock(  LFormattedTextRef & frmtext, RenderRectAccessor * fmt, int width, BlockFloatFootprint * float_footprint )
18326 {
18327     ASSERT_NODE_NOT_NULL;
18328     if ( !isElement() )
18329         return 0;
18330     //CRLog::trace("renderFinalBlock()");
18331     CVRendBlockCache & cache = getDocument()->getRendBlockCache();
18332     LFormattedTextRef f;
18333     lvdom_element_render_method rm = getRendMethod();
18334 
18335     if ( cache.get( this, f ) ) {
18336         if ( f->isReusable() ) {
18337             frmtext = f;
18338             if ( rm != erm_final )
18339                 return 0;
18340             //RenderRectAccessor fmt( this );
18341             //CRLog::trace("Found existing formatted object for node #%08X", (lUInt32)this);
18342             return fmt->getHeight();
18343         }
18344         // Not resuable: remove it, just to be sure it's properly freed
18345         cache.remove( this );
18346     }
18347     f = getDocument()->createFormattedText();
18348     if ( rm != erm_final )
18349         return 0;
18350 
18351     /// Render whole node content as single formatted object
18352 
18353     // Get some properties cached in this node's RenderRectAccessor
18354     // and set the initial flags and lang_cfg (for/from the final node
18355     // itself) for renderFinalBlock(),
18356     int direction = RENDER_RECT_PTR_GET_DIRECTION(fmt);
18357     lUInt32 flags = styleToTextFmtFlags( true, getStyle(), 0, direction );
18358     int lang_node_idx = fmt->getLangNodeIndex();
18359     TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg(lang_node_idx>0 ? getDocument()->getTinyNode(lang_node_idx) : NULL);
18360 
18361     // Add this node's inner content (text and children nodes) as source text
18362     // and image fragments into the empty LFormattedText object
18363     ::renderFinalBlock( this, f.get(), fmt, flags, 0, -1, lang_cfg );
18364     // We need to store this LFormattedTextRef in the cache for it to
18365     // survive when leaving this function (some callers do use it).
18366     cache.set( this, f );
18367 
18368     // Gather some outer properties and context, so we can format (render)
18369     // the inner content in that context.
18370     // This page_h we provide to f->Format() is only used to enforce a max height to images
18371     int page_h = getDocument()->getPageHeight();
18372     // Save or restore outer floats footprint (it is only provided
18373     // when rendering the document - when this is called to draw the
18374     // node, or search for text and links, we need to get it from
18375     // the cached RenderRectAccessor).
18376     BlockFloatFootprint restored_float_footprint; // (need to be available when we exit the else {})
18377     if (float_footprint) { // Save it in this node's RenderRectAccessor
18378         float_footprint->store( this );
18379     }
18380     else { // Restore it from this node's RenderRectAccessor
18381         float_footprint = &restored_float_footprint;
18382         float_footprint->restore( this, (lUInt16)width );
18383     }
18384     if ( !getDocument()->isRendered() ) {
18385         // Full rendering in progress: avoid some uneeded work that
18386         // is only needed when we'll be drawing the formatted text
18387         // (like alignLign()): this will mark it as not reusable, and
18388         // one that is on a page to be drawn will be reformatted .
18389         f->requestLightFormatting();
18390     }
18391     int usable_left_overflow = fmt->getUsableLeftOverflow();
18392     int usable_right_overflow = fmt->getUsableRightOverflow();
18393 
18394     // Note: some properties are set into LFormattedText by lvrend.cpp's renderFinalBlock(),
18395     // while some others are only passed below as parameters to LFormattedText->Format().
18396     // The former should logically be source inner content properties (strut, text indent)
18397     // while the latter should be formatting and outer context properties (block width,
18398     // page height...).
18399     // There might be a few drifts from that logic, or duplicates ('direction' is
18400     // passed both ways), that could need a little rework.
18401 
18402     // Format/render inner content: this makes lines and words, which are
18403     // cached into the LFormattedText and ready to be used for drawing
18404     // and text selection.
18405     int h = f->Format((lUInt16)width, (lUInt16)page_h, direction, usable_left_overflow, usable_right_overflow,
18406                             getDocument()->getHangingPunctiationEnabled(), float_footprint);
18407     frmtext = f;
18408     //CRLog::trace("Created new formatted object for node #%08X", (lUInt32)this);
18409     return h;
18410 }
18411 
18412 /// formats final block again after change, returns true if size of block is changed
18413 /// (not used anywhere, not updated to use RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET)
refreshFinalBlock()18414 bool ldomNode::refreshFinalBlock()
18415 {
18416     ASSERT_NODE_NOT_NULL;
18417     if ( getRendMethod() != erm_final )
18418         return false;
18419     // TODO: implement reformatting of one node
18420     CVRendBlockCache & cache = getDocument()->getRendBlockCache();
18421     cache.remove( this );
18422     RenderRectAccessor fmt( this );
18423     lvRect oldRect, newRect;
18424     fmt.getRect( oldRect );
18425     LFormattedTextRef txtform;
18426     int width = fmt.getWidth();
18427     renderFinalBlock( txtform, &fmt, width-measureBorder(this,1)-measureBorder(this,3)
18428          -lengthToPx(this->getStyle()->padding[0],fmt.getWidth(),this->getFont()->getSize())
18429          -lengthToPx(this->getStyle()->padding[1],fmt.getWidth(),this->getFont()->getSize()));
18430     fmt.getRect( newRect );
18431     if ( oldRect == newRect )
18432         return false;
18433     // TODO: relocate other blocks
18434     return true;
18435 }
18436 
18437 #endif
18438 
18439 /// replace node with r/o persistent implementation
persist()18440 ldomNode * ldomNode::persist()
18441 {
18442     ASSERT_NODE_NOT_NULL;
18443 #if BUILD_LITE!=1
18444     if ( !isPersistent() ) {
18445         if ( isElement() ) {
18446             // ELEM->PELEM
18447             tinyElement * elem = NPELEM;
18448             int attrCount = elem->_attrs.length();
18449             int childCount = elem->_children.length();
18450             _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_PELEMENT;
18451             _data._pelem_addr = getDocument()->_elemStorage.allocElem(_handle._dataIndex, elem->_parentNode ? elem->_parentNode->_handle._dataIndex : 0, elem->_children.length(), elem->_attrs.length() );
18452             ElementDataStorageItem * data = getDocument()->_elemStorage.getElem(_data._pelem_addr);
18453             data->nsid = elem->_nsid;
18454             data->id = elem->_id;
18455             lUInt16 * attrs = data->attrs();
18456             int i;
18457             for ( i=0; i<attrCount; i++ ) {
18458                 const lxmlAttribute * attr = elem->_attrs[i];
18459                 attrs[i * 4] = attr->nsid;     // namespace
18460                 attrs[i * 4 + 1] = attr->id;   // id
18461                 attrs[i * 4 + 2] = (lUInt16)(attr->index & 0xFFFF);// value lower 2-bytes
18462                 attrs[i * 4 + 3] = (lUInt16)(attr->index >> 16);// value higher 2-bytes
18463             }
18464             for ( i=0; i<childCount; i++ ) {
18465                 data->children[i] = elem->_children[i];
18466             }
18467             data->rendMethod = (lUInt8)elem->_rendMethod;
18468             delete elem;
18469         } else {
18470             // TEXT->PTEXT
18471             lString8 utf8 = _data._text_ptr->getText();
18472             lUInt32 parentIndex = _data._text_ptr->getParentIndex();
18473             delete _data._text_ptr;
18474             _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_PTEXT;
18475             _data._ptext_addr = getDocument()->_textStorage.allocText(_handle._dataIndex, parentIndex, utf8 );
18476             // change type
18477         }
18478     }
18479 #endif
18480     return this;
18481 }
18482 
18483 /// replace node with r/w implementation
modify()18484 ldomNode * ldomNode::modify()
18485 {
18486     ASSERT_NODE_NOT_NULL;
18487 #if BUILD_LITE!=1
18488     if ( isPersistent() ) {
18489         if ( isElement() ) {
18490             // PELEM->ELEM
18491             ElementDataStorageItem * data = getDocument()->_elemStorage.getElem(_data._pelem_addr);
18492             tinyElement * elem = new tinyElement(getDocument(), getParentNode(), data->nsid, data->id );
18493             for ( int i=0; i<data->childCount; i++ )
18494                 elem->_children.add( data->children[i] );
18495             for ( int i=0; i<data->attrCount; i++ )
18496                 elem->_attrs.add( data->attr(i) );
18497             _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_ELEMENT;
18498             elem->_rendMethod = (lvdom_element_render_method)data->rendMethod;
18499             getDocument()->_elemStorage.freeNode( _data._pelem_addr );
18500             NPELEM = elem;
18501         } else {
18502             // PTEXT->TEXT
18503             // convert persistent text to mutable
18504             lString8 utf8 = getDocument()->_textStorage.getText(_data._ptext_addr);
18505             lUInt32 parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
18506             getDocument()->_textStorage.freeNode( _data._ptext_addr );
18507             _data._text_ptr = new ldomTextNode( parentIndex, utf8 );
18508             // change type
18509             _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_TEXT;
18510         }
18511     }
18512 #endif
18513     return this;
18514 }
18515 
18516 
18517 /// dumps memory usage statistics to debug log
dumpStatistics()18518 void tinyNodeCollection::dumpStatistics()
18519 {
18520     CRLog::info("*** Document memory usage: "
18521                 "elements:%d, textNodes:%d, "
18522                 "ptext=("
18523                 "%d uncompressed), "
18524                 "ptelems=("
18525                 "%d uncompressed), "
18526                 "rects=("
18527                 "%d uncompressed), "
18528                 "nodestyles=("
18529                 "%d uncompressed), "
18530                 "styles:%d, fonts:%d, renderedNodes:%d, "
18531                 "totalNodes:%d(%dKb), mutableElements:%d(~%dKb)",
18532                 _elemCount, _textCount,
18533                 _textStorage.getUncompressedSize(),
18534                 _elemStorage.getUncompressedSize(),
18535                 _rectStorage.getUncompressedSize(),
18536                 _styleStorage.getUncompressedSize(),
18537                 _styles.length(), _fonts.length(),
18538 #if BUILD_LITE!=1
18539                 ((ldomDocument*)this)->_renderedBlockCache.length(),
18540 #else
18541                 0,
18542 #endif
18543                 _itemCount, _itemCount*16/1024,
18544                 _tinyElementCount, _tinyElementCount*(sizeof(tinyElement)+8*4)/1024 );
18545 }
getStatistics()18546 lString32 tinyNodeCollection::getStatistics()
18547 {
18548     lString32 s;
18549     s << "Elements: " << fmt::decimal(_elemCount) << ", " << fmt::decimal(_elemStorage.getUncompressedSize()/1024) << " KB\n";
18550     s << "Text nodes: " << fmt::decimal(_textCount) << ", " << fmt::decimal(_textStorage.getUncompressedSize()/1024) << " KB\n";
18551     s << "Styles: " << fmt::decimal(_styles.length()) << ", " << fmt::decimal(_styleStorage.getUncompressedSize()/1024) << " KB\n";
18552     s << "Font instances: " << fmt::decimal(_fonts.length()) << "\n";
18553     s << "Rects: " << fmt::decimal(_rectStorage.getUncompressedSize()/1024) << " KB\n";
18554     #if BUILD_LITE!=1
18555     s << "Cached rendered blocks: " << fmt::decimal(((ldomDocument*)this)->_renderedBlockCache.length()) << "\n";
18556     #endif
18557     s << "Total nodes: " << fmt::decimal(_itemCount) << ", " << fmt::decimal(_itemCount*16/1024) << " KB\n";
18558     s << "Mutable elements: " << fmt::decimal(_tinyElementCount) << ", " << fmt::decimal(_tinyElementCount*(sizeof(tinyElement)+8*4)/1024) << " KB";
18559     return s;
18560 }
18561 
18562 
18563 /// returns position pointer
getXPointer()18564 ldomXPointer LVTocItem::getXPointer()
18565 {
18566     if ( _position.isNull() && !_path.empty() ) {
18567         _position = _doc->createXPointer( _path );
18568         if ( _position.isNull() ) {
18569             CRLog::trace("TOC node is not found for path %s", LCSTR(_path) );
18570         } else {
18571             CRLog::trace("TOC node is found for path %s", LCSTR(_path) );
18572             // CRLog::trace("           gives xpointer: %s", UnicodeToLocal(_position.toString()).c_str());
18573         }
18574     }
18575     return _position;
18576 }
18577 
18578 /// returns position path
getPath()18579 lString32 LVTocItem::getPath()
18580 {
18581     if ( _path.empty() && !_position.isNull())
18582         _path = _position.toString();
18583     return _path;
18584 }
18585 
18586 /// returns Y position
getY()18587 int LVTocItem::getY()
18588 {
18589 #if BUILD_LITE!=1
18590     return getXPointer().toPoint().y;
18591 #else
18592     return 0;
18593 #endif
18594 }
18595 
18596 /// serialize to byte array (pointer will be incremented by number of bytes written)
serialize(SerialBuf & buf)18597 bool LVTocItem::serialize( SerialBuf & buf )
18598 {
18599 //    LVTocItem *     _parent;
18600 //    int             _level;
18601 //    int             _index;
18602 //    int             _page;
18603 //    int             _percent;
18604 //    lString32       _name;
18605 //    ldomXPointer    _position;
18606 //    LVPtrVector<LVTocItem> _children;
18607 
18608     buf << (lUInt32)_level << (lUInt32)_index << (lUInt32)_page << (lUInt32)_percent << (lUInt32)_children.length() << _name << getPath();
18609     if ( buf.error() )
18610         return false;
18611     for ( int i=0; i<_children.length(); i++ ) {
18612         _children[i]->serialize( buf );
18613         if ( buf.error() )
18614             return false;
18615     }
18616     return !buf.error();
18617 }
18618 
18619 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserialize(ldomDocument * doc,SerialBuf & buf)18620 bool LVTocItem::deserialize( ldomDocument * doc, SerialBuf & buf )
18621 {
18622     if ( buf.error() )
18623         return false;
18624     lInt32 childCount = 0;
18625     buf >> _level >> _index >> _page >> _percent >> childCount >> _name >> _path;
18626 //    CRLog::trace("[%d] %05d  %s  %s", _level, _page, LCSTR(_name), LCSTR(_path));
18627     if ( buf.error() )
18628         return false;
18629 //    if ( _level>0 ) {
18630 //        _position = doc->createXPointer( _path );
18631 //        if ( _position.isNull() ) {
18632 //            CRLog::error("Cannot find TOC node by path %s", LCSTR(_path) );
18633 //            buf.seterror();
18634 //            return false;
18635 //        }
18636 //    }
18637     for ( int i=0; i<childCount; i++ ) {
18638         LVTocItem * item = new LVTocItem(doc);
18639         if ( !item->deserialize( doc, buf ) ) {
18640             delete item;
18641             return false;
18642         }
18643         item->_parent = this;
18644         _children.add( item );
18645         if ( buf.error() )
18646             return false;
18647     }
18648     return true;
18649 }
18650 
18651 /// returns page number
18652 //int LVTocItem::getPageNum( LVRendPageList & pages )
18653 //{
18654 //    return getSectionPage( _position.getNode(), pages );
18655 //}
18656 
18657 
makeTocFromCrHintsOrHeadings(ldomNode * node,bool ensure_cr_hints)18658 static inline void makeTocFromCrHintsOrHeadings( ldomNode * node, bool ensure_cr_hints )
18659 {
18660     int level;
18661     if ( ensure_cr_hints ) {
18662         css_style_ref_t style = node->getStyle();
18663         if ( STYLE_HAS_CR_HINT(style, TOC_IGNORE) )
18664             return; // requested to be ignored via style tweaks
18665         if ( STYLE_HAS_CR_HINT(style, TOC_LEVELS_MASK) ) {
18666             if      ( STYLE_HAS_CR_HINT(style, TOC_LEVEL1) ) level = 1;
18667             else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL2) ) level = 2;
18668             else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL3) ) level = 3;
18669             else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL4) ) level = 4;
18670             else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL5) ) level = 5;
18671             else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL6) ) level = 6;
18672             else level = 7; // should not be reached
18673         }
18674         else if ( node->getNodeId() >= el_h1 && node->getNodeId() <= el_h6 )
18675             // el_h1 .. el_h6 are consecutive and ordered in include/fb2def.h
18676             level = node->getNodeId() - el_h1 + 1;
18677         else
18678             return;
18679     }
18680     else {
18681         if ( node->getNodeId() >= el_h1 && node->getNodeId() <= el_h6 )
18682             // el_h1 .. el_h6 are consecutive and ordered in include/fb2def.h
18683             level = node->getNodeId() - el_h1 + 1;
18684         else
18685             return;
18686     }
18687     lString32 title = removeSoftHyphens( node->getText(' ') );
18688     ldomXPointer xp = ldomXPointer(node, 0);
18689     LVTocItem * root = node->getDocument()->getToc();
18690     LVTocItem * parent = root;
18691     // Find adequate parent, or create intermediates
18692     int plevel = 1;
18693     while (plevel < level) {
18694         int nbc = parent->getChildCount();
18695         if (nbc) { // use the latest child
18696             parent = parent->getChild(nbc-1);
18697         }
18698         else {
18699             // If we'd like to stick it to the last parent found, even if
18700             // of wrong level, just do: break;
18701             // But it is cleaner to create intermediate(s)
18702             parent = parent->addChild(U"", xp, lString32::empty_str);
18703         }
18704         plevel++;
18705     }
18706     parent->addChild(title, xp, lString32::empty_str);
18707 }
18708 
makeTocFromHeadings(ldomNode * node)18709 static void makeTocFromHeadings( ldomNode * node )
18710 {
18711     makeTocFromCrHintsOrHeadings( node, false );
18712 }
18713 
makeTocFromCrHintsOrHeadings(ldomNode * node)18714 static void makeTocFromCrHintsOrHeadings( ldomNode * node )
18715 {
18716     makeTocFromCrHintsOrHeadings( node, true );
18717 }
18718 
makeTocFromDocFragments(ldomNode * node)18719 static void makeTocFromDocFragments( ldomNode * node )
18720 {
18721     if ( node->getNodeId() != el_DocFragment )
18722         return;
18723     // No title, and only level 1 with DocFragments
18724     ldomXPointer xp = ldomXPointer(node, 0);
18725     LVTocItem * root = node->getDocument()->getToc();
18726     root->addChild(U"", xp, lString32::empty_str);
18727 }
18728 
buildTocFromHeadings()18729 void ldomDocument::buildTocFromHeadings()
18730 {
18731     m_toc.clear();
18732     getRootNode()->recurseElements(makeTocFromHeadings);
18733 }
18734 
buildAlternativeToc()18735 void ldomDocument::buildAlternativeToc()
18736 {
18737     m_toc.clear();
18738     // Look first for style tweaks specified -cr-hint: toc-level1 ... toc-level6
18739     // and/or headings (H1...H6)
18740     getRootNode()->recurseElements(makeTocFromCrHintsOrHeadings);
18741     // If no heading or hints found, fall back to gathering DocFraments
18742     if ( !m_toc.getChildCount() )
18743         getRootNode()->recurseElements(makeTocFromDocFragments);
18744     // m_toc.setAlternativeTocFlag() uses the root toc item _page property
18745     // (never used for the root node) to store the fact this is an
18746     // alternatve TOC. This info can then be serialized to cache and
18747     // retrieved without any additional work or space overhead.
18748     m_toc.setAlternativeTocFlag();
18749     // cache file will have to be updated with the alt TOC
18750     setCacheFileStale(true);
18751     _toc_from_cache_valid = false; // to force update of page numbers
18752 }
18753 
18754 /// returns position pointer
getXPointer()18755 ldomXPointer LVPageMapItem::getXPointer()
18756 {
18757     if ( _position.isNull() && !_path.empty() ) {
18758         _position = _doc->createXPointer( _path );
18759         if ( _position.isNull() ) {
18760             CRLog::trace("LVPageMapItem node is not found for path %s", LCSTR(_path) );
18761         } else {
18762             CRLog::trace("LVPageMapItem node is found for path %s", LCSTR(_path) );
18763         }
18764     }
18765     return _position;
18766 }
18767 
18768 /// returns position path
getPath()18769 lString32 LVPageMapItem::getPath()
18770 {
18771     if ( _path.empty() && !_position.isNull())
18772         _path = _position.toString();
18773     return _path;
18774 }
18775 
18776 /// returns Y position
getDocY(bool refresh)18777 int LVPageMapItem::getDocY(bool refresh)
18778 {
18779 #if BUILD_LITE!=1
18780     if ( _doc_y < 0 || refresh )
18781         _doc_y = getXPointer().toPoint().y;
18782     if ( _doc_y < 0 && !_position.isNull() ) {
18783         // We got a xpointer, that did not resolve to a point.
18784         // It may be because the node it points to is invisible,
18785         // which may happen with pagebreak spans (that may not
18786         // be empty, and were set to "display: none").
18787         ldomXPointerEx xp = _position;
18788         if ( !xp.isVisible() ) {
18789             if ( xp.nextVisibleText() ) {
18790                 _doc_y = xp.toPoint().y;
18791             }
18792             else {
18793                 xp = _position;
18794                 if ( xp.prevVisibleText() ) {
18795                     _doc_y = xp.toPoint().y;
18796                 }
18797             }
18798         }
18799     }
18800     return _doc_y;
18801 #else
18802     return 0;
18803 #endif
18804 }
18805 
18806 /// serialize to byte array (pointer will be incremented by number of bytes written)
serialize(SerialBuf & buf)18807 bool LVPageMapItem::serialize( SerialBuf & buf )
18808 {
18809     buf << (lUInt32)_index << (lUInt32)_page << (lUInt32)_doc_y << _label << getPath();
18810     return !buf.error();
18811 }
18812 
18813 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserialize(ldomDocument * doc,SerialBuf & buf)18814 bool LVPageMapItem::deserialize( ldomDocument * doc, SerialBuf & buf )
18815 {
18816     if ( buf.error() )
18817         return false;
18818     buf >> _index >> _page >> _doc_y >> _label >> _path;
18819     return !buf.error();
18820 
18821 }
18822 /// serialize to byte array (pointer will be incremented by number of bytes written)
serialize(SerialBuf & buf)18823 bool LVPageMap::serialize( SerialBuf & buf )
18824 {
18825     buf << (lUInt32)_page_info_valid << (lUInt32)_children.length() << _source;
18826     if ( buf.error() )
18827         return false;
18828     for ( int i=0; i<_children.length(); i++ ) {
18829         _children[i]->serialize( buf );
18830         if ( buf.error() )
18831             return false;
18832     }
18833     return !buf.error();
18834 }
18835 
18836 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserialize(ldomDocument * doc,SerialBuf & buf)18837 bool LVPageMap::deserialize( ldomDocument * doc, SerialBuf & buf )
18838 {
18839     if ( buf.error() )
18840         return false;
18841     lUInt32 childCount = 0;
18842     lUInt32 pageInfoValid = 0;
18843     buf >> pageInfoValid >> childCount >> _source;
18844     if ( buf.error() )
18845         return false;
18846     _page_info_valid = (bool)pageInfoValid;
18847     for ( int i=0; i<childCount; i++ ) {
18848         LVPageMapItem * item = new LVPageMapItem(doc);
18849         if ( !item->deserialize( doc, buf ) ) {
18850             delete item;
18851             return false;
18852         }
18853         _children.add( item );
18854         if ( buf.error() )
18855             return false;
18856     }
18857     return true;
18858 }
18859 
18860 
18861 #if 0 && defined(_DEBUG)
18862 
18863 #define TEST_FILE_NAME "/tmp/test-cache-file.dat"
18864 
18865 #include <lvdocview.h>
18866 
18867 void testCacheFile()
18868 {
18869 #if BUILD_LITE!=1
18870     CRLog::info("Starting CacheFile unit test");
18871     lUInt8 data1[] = {'T', 'e', 's', 't', 'd', 'a', 't', 'a', 1, 2, 3, 4, 5, 6, 7};
18872     lUInt8 data2[] = {'T', 'e', 's', 't', 'd', 'a', 't', 'a', '2', 1, 2, 3, 4, 5, 6, 7};
18873     lUInt8 * buf1;
18874     lUInt8 * buf2;
18875     int sz1;
18876     int sz2;
18877     lString32 fn(TEST_FILE_NAME);
18878 
18879     {
18880         lUInt8 data1[] = {'T', 'e', 's', 't', 'D', 'a', 't', 'a', '1'};
18881         lUInt8 data2[] = {'T', 'e', 's', 't', 'D', 'a', 't', 'a', '2', 1, 2, 3, 4, 5, 6, 7};
18882         LVStreamRef s = LVOpenFileStream( fn.c_str(), LVOM_APPEND );
18883         s->SetPos(0);
18884         s->Write(data1, sizeof(data1), NULL);
18885         s->SetPos(4096);
18886         s->Write(data1, sizeof(data1), NULL);
18887         s->SetPos(8192);
18888         s->Write(data2, sizeof(data2), NULL);
18889         s->SetPos(4096);
18890         s->Write(data2, sizeof(data2), NULL);
18891         lUInt8 buf[16];
18892         s->SetPos(0);
18893         s->Read(buf, sizeof(data1), NULL);
18894         MYASSERT(!memcmp(buf, data1, sizeof(data1)), "read 1 content");
18895         s->SetPos(4096);
18896         s->Read(buf, sizeof(data2), NULL);
18897         MYASSERT(!memcmp(buf, data2, sizeof(data2)), "read 2 content");
18898 
18899         //return;
18900     }
18901 
18902     // write
18903     {
18904         CacheFile f;
18905         MYASSERT(f.open(cs32("/tmp/blabla-not-exits-file-name"))==false, "Wrong failed open result");
18906         MYASSERT(f.create( fn )==true, "new file created");
18907         MYASSERT(f.write(CBT_TEXT_DATA, 1, data1, sizeof(data1), true)==true, "write 1");
18908         MYASSERT(f.write(CBT_ELEM_DATA, 3, data2, sizeof(data2), false)==true, "write 2");
18909 
18910         MYASSERT(f.read(CBT_TEXT_DATA, 1, buf1, sz1)==true, "read 1");
18911         MYASSERT(f.read(CBT_ELEM_DATA, 3, buf2, sz2)==true, "read 2");
18912         MYASSERT(sz1==sizeof(data1), "read 1 size");
18913         MYASSERT(!memcmp(buf1, data1, sizeof(data1)), "read 1 content");
18914         MYASSERT(sz2==sizeof(data2), "read 2 size");
18915         MYASSERT(!memcmp(buf2, data2, sizeof(data2)), "read 2 content");
18916     }
18917     // write
18918     {
18919         CacheFile f;
18920         MYASSERT(f.open(fn)==true, "Wrong failed open result");
18921         MYASSERT(f.read(CBT_TEXT_DATA, 1, buf1, sz1)==true, "read 1");
18922         MYASSERT(f.read(CBT_ELEM_DATA, 3, buf2, sz2)==true, "read 2");
18923         MYASSERT(sz1==sizeof(data1), "read 1 size");
18924         MYASSERT(!memcmp(buf1, data1, sizeof(data1)), "read 1 content");
18925         MYASSERT(sz2==sizeof(data2), "read 2 size");
18926         MYASSERT(!memcmp(buf2, data2, sizeof(data2)), "read 2 content");
18927     }
18928 
18929     CRLog::info("Finished CacheFile unit test");
18930 #endif
18931 }
18932 
18933 #ifdef _WIN32
18934 #define TEST_FN_TO_OPEN "/projects/test/bibl.fb2.zip"
18935 #else
18936 #define TEST_FN_TO_OPEN "/home/lve/src/test/bibl.fb2.zip"
18937 #endif
18938 
18939 void runFileCacheTest()
18940 {
18941 #if BUILD_LITE!=1
18942     CRLog::info("====Cache test started =====");
18943 
18944     // init and clear cache
18945     ldomDocCache::init(cs32("/tmp/cr3cache"), 100);
18946     MYASSERT(ldomDocCache::enabled(), "clear cache");
18947 
18948     {
18949         CRLog::info("====Open document and save to cache=====");
18950         LVDocView view(4);
18951         view.Resize(600, 800);
18952         bool res = view.LoadDocument(TEST_FN_TO_OPEN);
18953         MYASSERT(res, "load document");
18954         view.getPageImage(0);
18955         view.getDocProps()->setInt(PROP_FORCED_MIN_FILE_SIZE_TO_CACHE, 30000);
18956         view.swapToCache();
18957         //MYASSERT(res, "swap to cache");
18958         view.getDocument()->dumpStatistics();
18959     }
18960     {
18961         CRLog::info("====Open document from cache=====");
18962         LVDocView view(4);
18963         view.Resize(600, 800);
18964         bool res = view.LoadDocument(TEST_FN_TO_OPEN);
18965         MYASSERT(res, "load document");
18966         view.getDocument()->dumpStatistics();
18967         view.getPageImage(0);
18968     }
18969     CRLog::info("====Cache test finished=====");
18970 #endif
18971 }
18972 
18973 void runBasicTinyDomUnitTests()
18974 {
18975     CRLog::info("==========================");
18976     CRLog::info("Starting tinyDOM unit test");
18977     ldomDocument * doc = new ldomDocument();
18978     ldomNode * root = doc->getRootNode();//doc->allocTinyElement( NULL, 0, 0 );
18979     MYASSERT(root!=NULL,"root != NULL");
18980 
18981     int el_p = doc->getElementNameIndex(U"p");
18982     int el_title = doc->getElementNameIndex(U"title");
18983     int el_strong = doc->getElementNameIndex(U"strong");
18984     int el_emphasis = doc->getElementNameIndex(U"emphasis");
18985     int attr_id = doc->getAttrNameIndex(U"id");
18986     int attr_name = doc->getAttrNameIndex(U"name");
18987     static lUInt16 path1[] = {el_title, el_p, 0};
18988     static lUInt16 path2[] = {el_title, el_p, el_strong, 0};
18989 
18990     CRLog::info("* simple DOM operations, tinyElement");
18991     MYASSERT(root->isRoot(),"root isRoot");
18992     MYASSERT(root->getParentNode()==NULL,"root parent is null");
18993     MYASSERT(root->getParentIndex()==0,"root parent index == 0");
18994     MYASSERT(root->getChildCount()==0,"empty root child count");
18995     ldomNode * el1 = root->insertChildElement(el_p);
18996     MYASSERT(root->getChildCount()==1,"root child count 1");
18997     MYASSERT(el1->getParentNode()==root,"element parent node");
18998     MYASSERT(el1->getParentIndex()==root->getDataIndex(),"element parent node index");
18999     MYASSERT(el1->getNodeId()==el_p, "node id");
19000     MYASSERT(el1->getNodeNsId()==LXML_NS_NONE, "node nsid");
19001     MYASSERT(!el1->isRoot(),"elem not isRoot");
19002     ldomNode * el2 = root->insertChildElement(el_title);
19003     MYASSERT(root->getChildCount()==2,"root child count 2");
19004     MYASSERT(el2->getNodeId()==el_title, "node id");
19005     MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "node nsid");
19006     lString32 nodename = el2->getNodeName();
19007     //CRLog::debug("node name: %s", LCSTR(nodename));
19008     MYASSERT(nodename==U"title","node name");
19009     ldomNode * el21 = el2->insertChildElement(el_p);
19010     MYASSERT(root->getNodeLevel()==1,"node level 1");
19011     MYASSERT(el2->getNodeLevel()==2,"node level 2");
19012     MYASSERT(el21->getNodeLevel()==3,"node level 3");
19013     MYASSERT(el21->getNodeIndex()==0,"node index single");
19014     MYASSERT(el1->getNodeIndex()==0,"node index first");
19015     MYASSERT(el2->getNodeIndex()==1,"node index last");
19016     MYASSERT(root->getNodeIndex()==0,"node index for root");
19017     MYASSERT(root->getFirstChild()==el1,"first child");
19018     MYASSERT(root->getLastChild()==el2,"last child");
19019     MYASSERT(el2->getFirstChild()==el21,"first single child");
19020     MYASSERT(el2->getLastChild()==el21,"last single child");
19021     MYASSERT(el21->getFirstChild()==NULL,"first child - no children");
19022     MYASSERT(el21->getLastChild()==NULL,"last child - no children");
19023     ldomNode * el0 = root->insertChildElement(1, LXML_NS_NONE, el_title);
19024     MYASSERT(el1->getNodeIndex()==0,"insert in the middle");
19025     MYASSERT(el0->getNodeIndex()==1,"insert in the middle");
19026     MYASSERT(el2->getNodeIndex()==2,"insert in the middle");
19027     MYASSERT(root->getChildNode(0)==el1,"child node 0");
19028     MYASSERT(root->getChildNode(1)==el0,"child node 1");
19029     MYASSERT(root->getChildNode(2)==el2,"child node 2");
19030     ldomNode * removedNode = root->removeChild( 1 );
19031     MYASSERT(removedNode==el0,"removed node");
19032     el0->destroy();
19033     MYASSERT(el0->isNull(),"destroyed node isNull");
19034     MYASSERT(root->getChildNode(0)==el1,"child node 0, after removal");
19035     MYASSERT(root->getChildNode(1)==el2,"child node 1, after removal");
19036     ldomNode * el02 = root->insertChildElement(5, LXML_NS_NONE, el_emphasis);
19037     MYASSERT(el02==el0,"removed node reusage");
19038 
19039     {
19040         ldomNode * f1 = root->findChildElement(path1);
19041         MYASSERT(f1==el21, "find 1 on mutable - is el21");
19042         MYASSERT(f1->getNodeId()==el_p, "find 1 on mutable");
19043         //ldomNode * f2 = root->findChildElement(path2);
19044         //MYASSERT(f2!=NULL, "find 2 on mutable - not null");
19045         //MYASSERT(f2==el21, "find 2 on mutable - is el21");
19046         //MYASSERT(f2->getNodeId()==el_strong, "find 2 on mutable");
19047     }
19048 
19049     CRLog::info("* simple DOM operations, mutable text");
19050     lString32 sampleText("Some sample text.");
19051     lString32 sampleText2("Some sample text 2.");
19052     lString32 sampleText3("Some sample text 3.");
19053     ldomNode * text1 = el1->insertChildText(sampleText);
19054     MYASSERT(text1->getText()==sampleText, "sample text 1 match unicode");
19055     MYASSERT(text1->getNodeLevel()==3,"text node level");
19056     MYASSERT(text1->getNodeIndex()==0,"text node index");
19057     MYASSERT(text1->isText(),"text node isText");
19058     MYASSERT(!text1->isElement(),"text node isElement");
19059     MYASSERT(!text1->isNull(),"text node isNull");
19060     ldomNode * text2 = el1->insertChildText(0, sampleText2);
19061     MYASSERT(text2->getNodeIndex()==0,"text node index, insert at beginning");
19062     MYASSERT(text2->getText()==sampleText2, "sample text 2 match unicode");
19063     MYASSERT(text2->getText8()==UnicodeToUtf8(sampleText2), "sample text 2 match utf8");
19064     text1->setText(sampleText2);
19065     MYASSERT(text1->getText()==sampleText2, "sample text 1 match unicode, changed");
19066     text1->setText8(UnicodeToUtf8(sampleText3));
19067     MYASSERT(text1->getText()==sampleText3, "sample text 1 match unicode, changed 8");
19068     MYASSERT(text1->getText8()==UnicodeToUtf8(sampleText3), "sample text 1 match utf8, changed");
19069 
19070     MYASSERT(el1->getFirstTextChild()==text2, "firstTextNode");
19071     MYASSERT(el1->getLastTextChild()==text1, "lastTextNode");
19072     MYASSERT(el21->getLastTextChild()==NULL, "lastTextNode NULL");
19073 
19074 #if BUILD_LITE!=1
19075     CRLog::info("* style cache");
19076     {
19077         css_style_ref_t style1;
19078         style1 = css_style_ref_t( new css_style_rec_t );
19079         style1->display = css_d_block;
19080         style1->white_space = css_ws_normal;
19081         style1->text_align = css_ta_left;
19082         style1->text_align_last = css_ta_left;
19083         style1->text_decoration = css_td_none;
19084         style1->text_transform = css_tt_none;
19085         style1->hyphenate = css_hyph_auto;
19086         style1->color.type = css_val_unspecified;
19087         style1->color.value = 0x000000;
19088         style1->background_color.type = css_val_unspecified;
19089         style1->background_color.value = 0xFFFFFF;
19090         style1->page_break_before = css_pb_auto;
19091         style1->page_break_after = css_pb_auto;
19092         style1->page_break_inside = css_pb_auto;
19093         style1->vertical_align.type = css_val_unspecified;
19094         style1->vertical_align.value = css_va_baseline;
19095         style1->font_family = css_ff_sans_serif;
19096         style1->font_size.type = css_val_px;
19097         style1->font_size.value = 24 << 8;
19098         style1->font_name = cs8("Arial");
19099         style1->font_weight = css_fw_400;
19100         style1->font_style = css_fs_normal;
19101         style1->font_features.type = css_val_unspecified;
19102         style1->font_features.value = 0;
19103         style1->text_indent.type = css_val_px;
19104         style1->text_indent.value = 0;
19105         style1->line_height.type = css_val_unspecified;
19106         style1->line_height.value = css_generic_normal; // line-height: normal
19107         style1->cr_hint.type = css_val_unspecified;
19108         style1->cr_hint.value = CSS_CR_HINT_NONE;
19109 
19110         css_style_ref_t style2;
19111         style2 = css_style_ref_t( new css_style_rec_t );
19112         style2->display = css_d_block;
19113         style2->white_space = css_ws_normal;
19114         style2->text_align = css_ta_left;
19115         style2->text_align_last = css_ta_left;
19116         style2->text_decoration = css_td_none;
19117         style2->text_transform = css_tt_none;
19118         style2->hyphenate = css_hyph_auto;
19119         style2->color.type = css_val_unspecified;
19120         style2->color.value = 0x000000;
19121         style2->background_color.type = css_val_unspecified;
19122         style2->background_color.value = 0xFFFFFF;
19123         style2->page_break_before = css_pb_auto;
19124         style2->page_break_after = css_pb_auto;
19125         style2->page_break_inside = css_pb_auto;
19126         style2->vertical_align.type = css_val_unspecified;
19127         style2->vertical_align.value = css_va_baseline;
19128         style2->font_family = css_ff_sans_serif;
19129         style2->font_size.type = css_val_px;
19130         style2->font_size.value = 24 << 8;
19131         style2->font_name = cs8("Arial");
19132         style2->font_weight = css_fw_400;
19133         style2->font_style = css_fs_normal;
19134         style2->font_features.type = css_val_unspecified;
19135         style2->font_features.value = 0;
19136         style2->text_indent.type = css_val_px;
19137         style2->text_indent.value = 0;
19138         style2->line_height.type = css_val_unspecified;
19139         style2->line_height.value = css_generic_normal; // line-height: normal
19140         style2->cr_hint.type = css_val_unspecified;
19141         style2->cr_hint.value = CSS_CR_HINT_NONE;
19142 
19143         css_style_ref_t style3;
19144         style3 = css_style_ref_t( new css_style_rec_t );
19145         style3->display = css_d_block;
19146         style3->white_space = css_ws_normal;
19147         style3->text_align = css_ta_right;
19148         style3->text_align_last = css_ta_left;
19149         style3->text_decoration = css_td_none;
19150         style3->text_transform = css_tt_none;
19151         style3->hyphenate = css_hyph_auto;
19152         style3->color.type = css_val_unspecified;
19153         style3->color.value = 0x000000;
19154         style3->background_color.type = css_val_unspecified;
19155         style3->background_color.value = 0xFFFFFF;
19156         style3->page_break_before = css_pb_auto;
19157         style3->page_break_after = css_pb_auto;
19158         style3->page_break_inside = css_pb_auto;
19159         style3->vertical_align.type = css_val_unspecified;
19160         style3->vertical_align.value = css_va_baseline;
19161         style3->font_family = css_ff_sans_serif;
19162         style3->font_size.type = css_val_px;
19163         style3->font_size.value = 24 << 8;
19164         style3->font_name = cs8("Arial");
19165         style3->font_weight = css_fw_400;
19166         style3->font_style = css_fs_normal;
19167         style3->font_features.type = css_val_unspecified;
19168         style3->font_features.value = 0;
19169         style3->text_indent.type = css_val_px;
19170         style3->text_indent.value = 0;
19171         style3->line_height.type = css_val_unspecified;
19172         style3->line_height.value = css_generic_normal; // line-height: normal
19173         style3->cr_hint.type = css_val_unspecified;
19174         style3->cr_hint.value = CSS_CR_HINT_NONE;
19175 
19176         el1->setStyle(style1);
19177         css_style_ref_t s1 = el1->getStyle();
19178         MYASSERT(!s1.isNull(), "style is set");
19179         el2->setStyle(style2);
19180         MYASSERT(*style1==*style2, "identical styles : == is true");
19181         MYASSERT(calcHash(*style1)==calcHash(*style2), "identical styles have the same hashes");
19182         MYASSERT(el1->getStyle().get()==el2->getStyle().get(), "identical styles reused");
19183         el21->setStyle(style3);
19184         MYASSERT(el1->getStyle().get()!=el21->getStyle().get(), "different styles not reused");
19185     }
19186 
19187     CRLog::info("* font cache");
19188     {
19189         font_ref_t font1 = fontMan->GetFont(24, 400, false, css_ff_sans_serif, cs8("DejaVu Sans"));
19190         font_ref_t font2 = fontMan->GetFont(24, 400, false, css_ff_sans_serif, cs8("DejaVu Sans"));
19191         font_ref_t font3 = fontMan->GetFont(28, 800, false, css_ff_serif, cs8("DejaVu Sans Condensed"));
19192         MYASSERT(el1->getFont().isNull(), "font is not set");
19193         el1->setFont(font1);
19194         MYASSERT(!el1->getFont().isNull(), "font is set");
19195         el2->setFont(font2);
19196         MYASSERT(*font1==*font2, "identical fonts : == is true");
19197         MYASSERT(calcHash(font1)==calcHash(font2), "identical styles have the same hashes");
19198         MYASSERT(el1->getFont().get()==el2->getFont().get(), "identical fonts reused");
19199         el21->setFont(font3);
19200         MYASSERT(el1->getFont().get()!=el21->getFont().get(), "different fonts not reused");
19201     }
19202 
19203     CRLog::info("* persistance test");
19204 
19205     el2->setAttributeValue(LXML_NS_NONE, attr_id, U"id1");
19206     el2->setAttributeValue(LXML_NS_NONE, attr_name, U"name1");
19207     MYASSERT(el2->getNodeId()==el_title, "mutable node id");
19208     MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "mutable node nsid");
19209     MYASSERT(el2->getAttributeValue(attr_id)==U"id1", "attr id1 mutable");
19210     MYASSERT(el2->getAttributeValue(attr_name)==U"name1", "attr name1 mutable");
19211     MYASSERT(el2->getAttrCount()==2, "attr count mutable");
19212     el2->persist();
19213     MYASSERT(el2->getAttributeValue(attr_id)==U"id1", "attr id1 pers");
19214     MYASSERT(el2->getAttributeValue(attr_name)==U"name1", "attr name1 pers");
19215     MYASSERT(el2->getNodeId()==el_title, "persistent node id");
19216     MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "persistent node nsid");
19217     MYASSERT(el2->getAttrCount()==2, "attr count persist");
19218 
19219     {
19220         ldomNode * f1 = root->findChildElement(path1);
19221         MYASSERT(f1==el21, "find 1 on mutable - is el21");
19222         MYASSERT(f1->getNodeId()==el_p, "find 1 on mutable");
19223     }
19224 
19225     el2->modify();
19226     MYASSERT(el2->getNodeId()==el_title, "mutable 2 node id");
19227     MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "mutable 2 node nsid");
19228     MYASSERT(el2->getAttributeValue(attr_id)==U"id1", "attr id1 mutable 2");
19229     MYASSERT(el2->getAttributeValue(attr_name)==U"name1", "attr name1 mutable 2");
19230     MYASSERT(el2->getAttrCount()==2, "attr count mutable 2");
19231 
19232     {
19233         ldomNode * f1 = root->findChildElement(path1);
19234         MYASSERT(f1==el21, "find 1 on mutable - is el21");
19235         MYASSERT(f1->getNodeId()==el_p, "find 1 on mutable");
19236     }
19237 
19238     CRLog::info("* convert to persistent");
19239     CRTimerUtil infinite;
19240     doc->persist(infinite);
19241     doc->dumpStatistics();
19242 
19243     MYASSERT(el21->getFirstChild()==NULL,"first child - no children");
19244     MYASSERT(el21->isPersistent(), "persistent before insertChildElement");
19245     ldomNode * el211 = el21->insertChildElement(el_strong);
19246     MYASSERT(!el21->isPersistent(), "mutable after insertChildElement");
19247     el211->persist();
19248     MYASSERT(el211->isPersistent(), "persistent before insertChildText");
19249     el211->insertChildText(cs32(U"bla bla bla"));
19250     el211->insertChildText(cs32(U"bla bla blaw"));
19251     MYASSERT(!el211->isPersistent(), "modifable after insertChildText");
19252     //el21->insertChildElement(el_strong);
19253     MYASSERT(el211->getChildCount()==2, "child count, in mutable");
19254     el211->persist();
19255     MYASSERT(el211->getChildCount()==2, "child count, in persistent");
19256     el211->modify();
19257     MYASSERT(el211->getChildCount()==2, "child count, in mutable again");
19258     CRTimerUtil infinite2;
19259     doc->persist(infinite2);
19260 
19261     ldomNode * f1 = root->findChildElement(path1);
19262     MYASSERT(f1->getNodeId()==el_p, "find 1");
19263     ldomNode * f2 = root->findChildElement(path2);
19264     MYASSERT(f2->getNodeId()==el_strong, "find 2");
19265     MYASSERT(f2 == el211, "find 2, ref");
19266 
19267 
19268     CRLog::info("* compacting");
19269     doc->compact();
19270     doc->dumpStatistics();
19271 #endif
19272 
19273     delete doc;
19274 
19275 
19276     CRLog::info("Finished tinyDOM unit test");
19277 
19278     CRLog::info("==========================");
19279 
19280 }
19281 
19282 void runCHMUnitTest()
19283 {
19284 #if CHM_SUPPORT_ENABLED==1
19285 #if BUILD_LITE!=1
19286     LVStreamRef stream = LVOpenFileStream("/home/lve/src/test/mysql.chm", LVOM_READ);
19287     MYASSERT ( !stream.isNull(), "container stream opened" );
19288     CRLog::trace("runCHMUnitTest() -- file stream opened ok");
19289     LVContainerRef dir = LVOpenCHMContainer( stream );
19290     MYASSERT ( !dir.isNull(), "container opened" );
19291     CRLog::trace("runCHMUnitTest() -- container opened ok");
19292     LVStreamRef s = dir->OpenStream(U"/index.html", LVOM_READ);
19293     MYASSERT ( !s.isNull(), "item opened" );
19294     CRLog::trace("runCHMUnitTest() -- index.html opened ok: size=%d", (int)s->GetSize());
19295     lvsize_t bytesRead = 0;
19296     char buf[1000];
19297     MYASSERT( s->SetPos(100)==100, "SetPos()" );
19298     MYASSERT( s->Read(buf, 1000, &bytesRead)==LVERR_OK, "Read()" );
19299     MYASSERT( bytesRead==1000, "Read() -- bytesRead" );
19300     buf[999] = 0;
19301     CRLog::trace("CHM/index.html Contents 1000: %s", buf);
19302 
19303     MYASSERT( s->SetPos(0)==0, "SetPos() 2" );
19304     MYASSERT( s->Read(buf, 1000, &bytesRead)==LVERR_OK, "Read() 2" );
19305     MYASSERT( bytesRead==1000, "Read() -- bytesRead 2" );
19306     buf[999] = 0;
19307     CRLog::trace("CHM/index.html Contents 0: %s", buf);
19308 #endif
19309 #endif
19310 }
19311 
19312 static void makeTestFile( const char * fname, int size )
19313 {
19314     LVStreamRef s = LVOpenFileStream( fname, LVOM_WRITE );
19315     MYASSERT( !s.isNull(), "makeTestFile create" );
19316     int seed = 0;
19317     lUInt8 * buf = new lUInt8[size];
19318     for ( int i=0; i<size; i++ ) {
19319         buf[i] = (seed >> 9) & 255;
19320         seed = seed * 31 + 14323;
19321     }
19322     MYASSERT( s->Write(buf, size, NULL)==LVERR_OK, "makeTestFile write" );
19323     delete[] buf;
19324 }
19325 
19326 void runBlockWriteCacheTest()
19327 {
19328 
19329 
19330 
19331     int sz = 2000000;
19332     const char * fn1 = "/tmp/tf1.dat";
19333     const char * fn2 = "/tmp/tf2.dat";
19334     //makeTestFile( fn1, sz );
19335     //makeTestFile( fn2, sz );
19336 
19337     CRLog::debug("BlockCache test started");
19338 
19339     LVStreamRef s1 = LVOpenFileStream( fn1, LVOM_APPEND );
19340     LVStreamRef s2 =  LVCreateBlockWriteStream( LVOpenFileStream( fn2, LVOM_APPEND ), 0x8000, 16);
19341     MYASSERT(! s1.isNull(), "s1");
19342     MYASSERT(! s2.isNull(), "s2");
19343     LVStreamRef ss = LVCreateCompareTestStream(s1, s2);
19344     lUInt8 buf[0x100000];
19345     for ( int i=0; i<sizeof(buf); i++ ) {
19346         buf[i] = (lUInt8)(rand() & 0xFF);
19347     }
19348     //memset( buf, 0xAD, 1000000 );
19349     ss->SetPos( 0 );
19350     ss->Write( buf, 150, NULL );
19351     ss->SetPos( 0 );
19352     ss->Write( buf, 150, NULL );
19353     ss->SetPos( 0 );
19354     ss->Write( buf, 150, NULL );
19355 
19356 
19357     ss->SetPos( 1000 );
19358     ss->Read( buf, 5000, NULL );
19359     ss->SetPos( 100000 );
19360     ss->Read( buf+10000, 150000, NULL );
19361 
19362     ss->SetPos( 1000 );
19363     ss->Write( buf, 15000, NULL );
19364     ss->SetPos( 1000 );
19365     ss->Read( buf+100000, 15000, NULL );
19366     ss->Read( buf, 1000000, NULL );
19367 
19368 
19369     ss->SetPos( 1000 );
19370     ss->Write( buf, 15000, NULL );
19371     ss->Write( buf, 15000, NULL );
19372     ss->Write( buf, 15000, NULL );
19373     ss->Write( buf, 15000, NULL );
19374 
19375 
19376     ss->SetPos( 100000 );
19377     ss->Write( buf+15000, 150000, NULL );
19378     ss->SetPos( 100000 );
19379     ss->Read( buf+25000, 200000, NULL );
19380 
19381     ss->SetPos( 100000 );
19382     ss->Read( buf+55000, 200000, NULL );
19383 
19384     ss->SetPos( 100000 );
19385     ss->Write( buf+1000, 250000, NULL );
19386     ss->SetPos( 150000 );
19387     ss->Read( buf, 50000, NULL );
19388     ss->SetPos( 1000000 );
19389     ss->Write( buf, 500000, NULL );
19390     for ( int i=0; i<10; i++ )
19391         ss->Write( buf, 5000, NULL );
19392     ss->Read( buf, 50000, NULL );
19393 
19394     ss->SetPos( 5000000 );
19395     ss->Write( buf, 500000, NULL );
19396     ss->SetPos( 4800000 );
19397     ss->Read( buf, 500000, NULL );
19398 
19399     for ( int i=0; i<20; i++ ) {
19400         int op = (rand() & 15) < 5;
19401         long offset = (rand()&0x7FFFF);
19402         long foffset = (rand()&0x3FFFFF);
19403         long size = (rand()&0x3FFFF);
19404         ss->SetPos(foffset);
19405         if ( op==0 ) {
19406             // read
19407             ss->Read(buf+offset, size, NULL);
19408         } else {
19409             ss->Write(buf+offset, size, NULL);
19410         }
19411     }
19412 
19413     CRLog::debug("BlockCache test finished");
19414 
19415 }
19416 
19417 void runTinyDomUnitTests()
19418 {
19419     CRLog::info("runTinyDomUnitTests()");
19420     runBlockWriteCacheTest();
19421 
19422     runBasicTinyDomUnitTests();
19423 
19424     CRLog::info("==========================");
19425     testCacheFile();
19426 
19427     runFileCacheTest();
19428     CRLog::info("==========================");
19429 
19430 }
19431 
19432 #endif
19433