1 /*******************************************************
2
3 CoolReader Engine
4
5 lvtinydom.cpp: fast and compact XML DOM tree
6
7 (c) Vadim Lopatin, 2000-2011
8 This source code is distributed under the terms of
9 GNU General Public License
10 See LICENSE file for details
11
12 *******************************************************/
13
14
15 /// Change this in case of incompatible changes in XML parsing or DOM
16 // building that could result in XPATHs being different than previously
17 // (this could make saved bookmarks and highlights, made with a previous
18 // version, not found in the DOM built with a newer version.
19 // Users of this library can request the old behaviour by setting
20 // gDOMVersionRequested to an older version to request the old (possibly
21 // buggy) behaviour.
22 #define DOM_VERSION_CURRENT 20200824
23
24 // Also defined in include/lvtinydom.h
25 #define DOM_VERSION_WITH_NORMALIZED_XPOINTERS 20200223
26
27 // Changes:
28 // 20100101 to 20180502: historical version
29 //
30 // 20180503: fixed <BR> that were previously converted to <P> because
31 // of a fix for "bad LIB.RU books" being applied in any case. This
32 // could prevent the style of the container to be applied on HTML
33 // sub-parts after a <BR>, or the style of <P> (with text-indent) to
34 // be applied after a <BR>.
35 //
36 // 20180524: changed default rendering of:
37 // <li> (and css 'display:list-item') from css_d_list_item_legacy to css_d_list_item_block
38 // <cite> from css_d_block to css_d_inline (inline in HTML, block in FB2, ensured by fb2.css)
39 // <style> from css_d_inline to css_d_none (invisible in HTML)
40 // Changed also the default display: value for base elements (and so
41 // for unknown elements) from css_d_inherit to css_d_inline, and disable
42 // inheritance for the display: property, as per specs.
43 // See https://developer.mozilla.org/en-US/docs/Web/CSS/display
44 // (Initial value: inline; Inherited: no)
45 //
46 // 20180528: clean epub.css from class name based declarations
47 // added support for style property -cr-ignore-if-dom-version-greater-or-equal: 20180528;
48 // to ignore the whole declaration with newer gDOMVersionRequested.
49 // Use it to keep class name based declarations that involve display:
50 // so to not break previous DOM
51 // Also: fb2def.h updates
52 // Changed some elements from XS_TAG1 to XS_TAG1T (<hr>, <ul>, <ol>,
53 // <dl>, <output>, <section>, <svg>), so any text node direct child is
54 // now displayed instead of just being dropped (all browsers do display
55 // such child text nodes).
56 // Also no more hide the <form> element content, as it may contain
57 // textual information.
58 // Also change <code> from 'white-space: pre' to 'normal', like browsers do
59 // Added missing block elements from HTML specs so they are correctly
60 // displayed as 'block' instead of the new default of 'inline'.
61 //
62 // (20190703: added support for CSS float: and clear: which may
63 // insert <floatBox> elements in the DOM tree. Bus as this is
64 // toggable, and legacy rendering is available, no need to limit
65 // their support to some DOM_VERSION. So no bump needed.)
66 //
67 // (20200110: added support for CSS display: inline-block and inline-table,
68 // which may insert <inlineBox> elements in the DOM tree. Bus as this is
69 // toggable, and legacy rendering is available, no need to limit
70 // their support to some DOM_VERSION. So no bump needed.)
71 //
72 // 20200223: normalized XPointers/XPATHs, by using createXPointerV2()
73 // and toStringV2(), that should ensure XPointers survive changes
74 // in style->display and the insertion or removal of autoBoxing,
75 // floatBox and inlineBox.
76 // (Older gDOMVersionRequested will keep using createXPointerV1()
77 // and toStringV1() to have non-normalized XPointers still working.)
78 // (20200223: added toggable auto completion of incomplete tables by
79 // wrapping some elements in a new <tabularBox>.)
80 //
81 // 20200824: added more HTML5 elements, and HTML parser changes
82 // to be (only a little bit) more HTML5 conformant
83
84 extern const int gDOMVersionCurrent = DOM_VERSION_CURRENT;
85
86
87 /// change in case of incompatible changes in swap/cache file format to avoid using incompatible swap file
88 #define CACHE_FILE_FORMAT_VERSION "3.12.75"
89
90 /// increment following value to force re-formatting of old book after load
91 #define FORMATTING_VERSION_ID 0x0026
92
93 #ifndef DOC_DATA_COMPRESSION_LEVEL
94 /// data compression level (0=no compression, 1=fast compressions, 3=normal compression)
95 // Note: keep this above 1, toggling between compression and no-compression
96 // can be done at run time by calling compressCachedData(false)
97 #define DOC_DATA_COMPRESSION_LEVEL 1 // 0, 1, 3 (0=no compression)
98 #endif
99
100 #ifndef STREAM_AUTO_SYNC_SIZE
101 #define STREAM_AUTO_SYNC_SIZE 300000
102 #endif //STREAM_AUTO_SYNC_SIZE
103
104 //=====================================================
105 // Document data caching parameters
106 //=====================================================
107
108 #ifndef DOC_BUFFER_SIZE
109 #define DOC_BUFFER_SIZE 0x00A00000UL // default buffer size
110 #endif
111
112 #if DOC_BUFFER_SIZE >= 0x7FFFFFFFUL
113 #error DOC_BUFFER_SIZE value is too large. This results in integer overflow.
114 #endif
115
116 //--------------------------------------------------------
117 // cache memory sizes
118 //--------------------------------------------------------
119 #ifndef ENABLED_BLOCK_WRITE_CACHE
120 #define ENABLED_BLOCK_WRITE_CACHE 1
121 #endif
122
123 #define WRITE_CACHE_TOTAL_SIZE (10*DOC_BUFFER_SIZE/100)
124
125 #define TEXT_CACHE_UNPACKED_SPACE (25*DOC_BUFFER_SIZE/100)
126 #define TEXT_CACHE_CHUNK_SIZE 0x008000 // 32K
127 #define ELEM_CACHE_UNPACKED_SPACE (45*DOC_BUFFER_SIZE/100)
128 #define ELEM_CACHE_CHUNK_SIZE 0x004000 // 16K
129 #define RECT_CACHE_UNPACKED_SPACE (45*DOC_BUFFER_SIZE/100)
130 #define RECT_CACHE_CHUNK_SIZE 0x00F000 // 64K
131 #define STYLE_CACHE_UNPACKED_SPACE (10*DOC_BUFFER_SIZE/100)
132 #define STYLE_CACHE_CHUNK_SIZE 0x00C000 // 48K
133 //--------------------------------------------------------
134
135 #define COMPRESS_NODE_DATA true
136 #define COMPRESS_NODE_STORAGE_DATA true
137 #define COMPRESS_MISC_DATA true
138 #define COMPRESS_PAGES_DATA true
139 #define COMPRESS_TOC_DATA true
140 #define COMPRESS_PAGEMAP_DATA true
141 #define COMPRESS_STYLE_DATA true
142
143 //#define CACHE_FILE_SECTOR_SIZE 4096
144 #define CACHE_FILE_SECTOR_SIZE 1024
145 #define CACHE_FILE_WRITE_BLOCK_PADDING 1
146
147 /// set t 1 to log storage reads/writes
148 #define DEBUG_DOM_STORAGE 0
149 //#define TRACE_AUTOBOX
150 /// set to 1 to enable crc check of all blocks of cache file on open
151 #ifndef ENABLE_CACHE_FILE_CONTENTS_VALIDATION
152 #define ENABLE_CACHE_FILE_CONTENTS_VALIDATION 1
153 #endif
154
155 #define RECT_DATA_CHUNK_ITEMS_SHIFT 11
156 #define STYLE_DATA_CHUNK_ITEMS_SHIFT 12
157
158 // calculated parameters
159 #define WRITE_CACHE_BLOCK_SIZE 0x4000
160 #define WRITE_CACHE_BLOCK_COUNT (WRITE_CACHE_TOTAL_SIZE/WRITE_CACHE_BLOCK_SIZE)
161 #define TEST_BLOCK_STREAM 0
162
163 #define PACK_BUF_SIZE 0x10000
164 #define UNPACK_BUF_SIZE 0x40000
165
166 #define RECT_DATA_CHUNK_ITEMS (1<<RECT_DATA_CHUNK_ITEMS_SHIFT)
167 #define RECT_DATA_CHUNK_SIZE (RECT_DATA_CHUNK_ITEMS*sizeof(lvdomElementFormatRec))
168 #define RECT_DATA_CHUNK_MASK (RECT_DATA_CHUNK_ITEMS-1)
169
170 #define STYLE_DATA_CHUNK_ITEMS (1<<STYLE_DATA_CHUNK_ITEMS_SHIFT)
171 #define STYLE_DATA_CHUNK_SIZE (STYLE_DATA_CHUNK_ITEMS*sizeof(ldomNodeStyleInfo))
172 #define STYLE_DATA_CHUNK_MASK (STYLE_DATA_CHUNK_ITEMS-1)
173
174
175 #define STYLE_HASH_TABLE_SIZE 512
176 #define FONT_HASH_TABLE_SIZE 256
177
178
179 static const char COMPRESSED_CACHE_FILE_MAGIC[] = "CoolReader 3 Cache"
180 " File v" CACHE_FILE_FORMAT_VERSION ": "
181 "c0"
182 "m1"
183 "\n";
184
185 static const char UNCOMPRESSED_CACHE_FILE_MAGIC[] = "CoolReader 3 Cache"
186 " File v" CACHE_FILE_FORMAT_VERSION ": "
187 "c0"
188 "m0"
189 "\n";
190
191 #define CACHE_FILE_MAGIC_SIZE 40
192
193 enum CacheFileBlockType {
194 CBT_FREE = 0,
195 CBT_INDEX = 1,
196 CBT_TEXT_DATA,
197 CBT_ELEM_DATA,
198 CBT_RECT_DATA, //4
199 CBT_ELEM_STYLE_DATA,
200 CBT_MAPS_DATA,
201 CBT_PAGE_DATA, //7
202 CBT_PROP_DATA,
203 CBT_NODE_INDEX,
204 CBT_ELEM_NODE,
205 CBT_TEXT_NODE,
206 CBT_REND_PARAMS, //12
207 CBT_TOC_DATA,
208 CBT_PAGEMAP_DATA,
209 CBT_STYLE_DATA,
210 CBT_BLOB_INDEX, //16
211 CBT_BLOB_DATA,
212 CBT_FONT_DATA //18
213 };
214
215
216 #include <stdlib.h>
217 #include <string.h>
218 #include "../include/crsetup.h"
219 #include "../include/lvstring.h"
220 #include "../include/lvtinydom.h"
221 #include "../include/fb2def.h"
222 #if BUILD_LITE!=1
223 #include "../include/lvrend.h"
224 #include "../include/chmfmt.h"
225 #endif
226 #include "../include/crtest.h"
227 #include "../include/crlog.h"
228 #include <stddef.h>
229 #include <math.h>
230 #include <zlib.h>
231 #include <xxhash.h>
232 #include <lvtextfm.h>
233
234 // define to store new text nodes as persistent text, instead of mutable
235 #define USE_PERSISTENT_TEXT 1
236
237
238 // default is to compress to use smaller cache files (but slower rendering
239 // and page turns with big documents)
240 static bool _compressCachedData = true;
compressCachedData(bool enable)241 void compressCachedData(bool enable) {
242 _compressCachedData = enable;
243 }
244
245 // default is to use the TEXT_CACHE_UNPACKED_SPACE & co defined above as is
246 static float _storageMaxUncompressedSizeFactor = 1;
setStorageMaxUncompressedSizeFactor(float factor)247 void setStorageMaxUncompressedSizeFactor(float factor) {
248 _storageMaxUncompressedSizeFactor = factor;
249 }
250
251 static bool _enableCacheFileContentsValidation = (bool)ENABLE_CACHE_FILE_CONTENTS_VALIDATION;
enableCacheFileContentsValidation(bool enable)252 void enableCacheFileContentsValidation(bool enable) {
253 _enableCacheFileContentsValidation = enable;
254 }
255
256 static int _nextDocumentIndex = 0;
257 ldomDocument * ldomNode::_documentInstances[MAX_DOCUMENT_INSTANCE_COUNT] = {NULL,};
258
259 /// adds document to list, returns ID of allocated document, -1 if no space in instance array
registerDocument(ldomDocument * doc)260 int ldomNode::registerDocument( ldomDocument * doc )
261 {
262 for ( int i=0; i<MAX_DOCUMENT_INSTANCE_COUNT; i++ ) {
263 if ( _nextDocumentIndex<0 || _nextDocumentIndex>=MAX_DOCUMENT_INSTANCE_COUNT )
264 _nextDocumentIndex = 0;
265 if ( _documentInstances[_nextDocumentIndex]==NULL) {
266 _documentInstances[_nextDocumentIndex] = doc;
267 CRLog::info("ldomNode::registerDocument() - new index = %d", _nextDocumentIndex);
268 return _nextDocumentIndex++;
269 }
270 _nextDocumentIndex++;
271 }
272 return -1;
273 }
274
275 /// removes document from list
unregisterDocument(ldomDocument * doc)276 void ldomNode::unregisterDocument( ldomDocument * doc )
277 {
278 for ( int i=0; i<MAX_DOCUMENT_INSTANCE_COUNT; i++ ) {
279 if ( _documentInstances[i]==doc ) {
280 CRLog::info("ldomNode::unregisterDocument() - for index %d", i);
281 _documentInstances[i] = NULL;
282 }
283 }
284 }
285
286 /// mutable text node
287 class ldomTextNode
288 {
289 lUInt32 _parentIndex;
290 lString8 _text;
291 public:
292
getParentIndex()293 lUInt32 getParentIndex()
294 {
295 return _parentIndex;
296 }
297
setParentIndex(lUInt32 n)298 void setParentIndex( lUInt32 n )
299 {
300 _parentIndex = n;
301 }
302
ldomTextNode(lUInt32 parentIndex,const lString8 & text)303 ldomTextNode( lUInt32 parentIndex, const lString8 & text )
304 : _parentIndex(parentIndex), _text(text)
305 {
306 }
307
getText()308 lString8 getText()
309 {
310 return _text;
311 }
312
getText32()313 lString32 getText32()
314 {
315 return Utf8ToUnicode(_text);
316 }
317
setText(const lString8 & s)318 void setText( const lString8 & s )
319 {
320 _text = s;
321 }
322
setText(const lString32 & s)323 void setText( const lString32 & s )
324 {
325 _text = UnicodeToUtf8(s);
326 }
327 };
328
329 #define LASSERT(x) \
330 if (!(x)) crFatalError(1111, "assertion failed: " #x)
331
332 //#define INDEX1 94
333 //#define INDEX2 96
334
335 //#define INDEX1 105
336 //#define INDEX2 106
337
338 /// pack data from _buf to _compbuf
339 bool ldomPack( const lUInt8 * buf, int bufsize, lUInt8 * &dstbuf, lUInt32 & dstsize );
340 /// unpack data from _compbuf to _buf
341 bool ldomUnpack( const lUInt8 * compbuf, int compsize, lUInt8 * &dstbuf, lUInt32 & dstsize );
342
343
344 #if BUILD_LITE!=1
345
346 //static lUInt32 calcHash32( const lUInt8 * s, int len )
347 //{
348 // lUInt32 res = 0;
349 // for ( int i=0; i<len; i++ ) {
350 // // res*31 + s
351 // res = (((((((res<<1)+res)<<1)+res)<<1)+res)<<1)+res + s[i];
352 // }
353 // return res;
354 //}
355
356 // FNV 64bit hash function
357 // from http://isthe.com/chongo/tech/comp/fnv/#gcc-O3
358
359 //#define NO_FNV_GCC_OPTIMIZATION
360 /*#define FNV_64_PRIME ((lUInt64)0x100000001b3ULL)
361 static lUInt64 calcHash64( const lUInt8 * s, int len )
362 {
363 const lUInt8 * endp = s + len;
364 // 64 bit FNV hash function
365 lUInt64 hval = 14695981039346656037ULL;
366 for ( ; s<endp; s++ ) {
367 #if defined(NO_FNV_GCC_OPTIMIZATION)
368 hval *= FNV_64_PRIME;
369 #else *//* NO_FNV_GCC_OPTIMIZATION *//*
370 hval += (hval << 1) + (hval << 4) + (hval << 5) +
371 (hval << 7) + (hval << 8) + (hval << 40);
372 #endif *//* NO_FNV_GCC_OPTIMIZATION *//*
373 hval ^= *s;
374 }
375 return hval;
376 }*/
calcHash(const lUInt8 * s,int len)377 static lUInt32 calcHash(const lUInt8 * s, int len)
378 {
379 return XXH32(s,len,0);
380 }
calcGlobalSettingsHash(int documentId,bool already_rendered)381 lUInt32 calcGlobalSettingsHash(int documentId, bool already_rendered)
382 {
383 lUInt32 hash = FORMATTING_VERSION_ID;
384 hash = hash * 31 + (int)fontMan->GetShapingMode();
385 if (fontMan->GetKerning())
386 hash = hash * 75 + 1761;
387 hash = hash * 31 + fontMan->GetFontListHash(documentId);
388 hash = hash * 31 + (int)fontMan->GetHintingMode();
389 if ( LVRendGetFontEmbolden() )
390 hash = hash * 75 + 2384761;
391 hash = hash * 31 + fontMan->GetFallbackFontFaces().getHash();
392 hash = hash * 31 + gRenderDPI;
393 hash = hash * 31 + gRootFontSize;
394 // If not yet rendered (initial loading with XML parsing), we can
395 // ignore some global flags that have not yet produced any effect,
396 // so they can possibly be updated between loading and rendering
397 // without trigerring a drop of all the styles and rend methods
398 // set up in the XML loading phase. This is mostly only needed
399 // for TextLangMan::getHash(), as the lang can be set by frontend
400 // code after the loading phase, once the book language is known
401 // from its metadata, before the rendering that will use the
402 // language set. (We could ignore some of the other settings
403 // above if we ever need to reset them in between these phases;
404 // just be certain they are really not used in the first phase.)
405 if ( already_rendered ) {
406 hash = hash * 31 + TextLangMan::getHash();
407 hash = hash * 31 + HyphMan::getLeftHyphenMin();
408 hash = hash * 31 + HyphMan::getRightHyphenMin();
409 hash = hash * 31 + HyphMan::getTrustSoftHyphens();
410 }
411 return hash;
412 }
413
dumpRendMethods(ldomNode * node,lString32 prefix)414 static void dumpRendMethods( ldomNode * node, lString32 prefix )
415 {
416 lString32 name = prefix;
417 if ( node->isText() )
418 name << node->getText();
419 else
420 name << "<" << node->getNodeName() << "> " << fmt::decimal(node->getRendMethod());
421 CRLog::trace( "%s ",LCSTR(name) );
422 for ( int i=0; i<node->getChildCount(); i++ ) {
423 dumpRendMethods( node->getChildNode(i), prefix + " ");
424 }
425 }
426
427
428
429
430
431 #define CACHE_FILE_ITEM_MAGIC 0xC007B00C
432 struct CacheFileItem
433 {
434 lUInt32 _magic; // magic number
435 lUInt16 _dataType; // data type
436 lUInt16 _dataIndex; // additional data index, for internal usage for data type
437 int _blockIndex; // sequential number of block
438 int _blockFilePos; // start of block
439 int _blockSize; // size of block within file
440 int _dataSize; // used data size inside block (<= block size)
441 lUInt64 _dataHash; // additional hash of data
442 lUInt64 _packedHash; // additional hash of packed data
443 lUInt32 _uncompressedSize; // size of uncompressed block, if compression is applied, 0 if no compression
444 lUInt32 _padding; // explicite padding (this struct would be implicitely padded from 44 bytes to 48 bytes)
445 // so we can set this padding value to 0 (instead of some random data with implicite padding)
446 // in order to get reproducible (same file checksum) cache files when this gets serialized
validateCacheFileItem447 bool validate( int fsize )
448 {
449 if ( _magic!=CACHE_FILE_ITEM_MAGIC ) {
450 CRLog::error("CacheFileItem::validate: block magic doesn't match");
451 return false;
452 }
453 if ( _dataSize>_blockSize || _blockSize<0 || _dataSize<0 || _blockFilePos+_dataSize>fsize || _blockFilePos<CACHE_FILE_SECTOR_SIZE) {
454 CRLog::error("CacheFileItem::validate: invalid block size or position");
455 return false;
456 }
457 return true;
458 }
CacheFileItemCacheFileItem459 CacheFileItem()
460 {
461 }
CacheFileItemCacheFileItem462 CacheFileItem( lUInt16 dataType, lUInt16 dataIndex )
463 : _magic(CACHE_FILE_ITEM_MAGIC)
464 , _dataType(dataType) // data type
465 , _dataIndex(dataIndex) // additional data index, for internal usage for data type
466 , _blockIndex(0) // sequential number of block
467 , _blockFilePos(0) // start of block
468 , _blockSize(0) // size of block within file
469 , _dataSize(0) // used data size inside block (<= block size)
470 , _dataHash(0) // hash of data
471 , _packedHash(0) // additional hash of packed data
472 , _uncompressedSize(0) // size of uncompressed block, if compression is applied, 0 if no compression
473 , _padding(0) // (padding)
474 {
475 }
476 };
477
478
479 struct SimpleCacheFileHeader
480 {
481 char _magic[CACHE_FILE_MAGIC_SIZE] = { 0 }; // magic
482 lUInt32 _dirty;
483 lUInt32 _dom_version;
SimpleCacheFileHeaderSimpleCacheFileHeader484 SimpleCacheFileHeader( lUInt32 dirtyFlag, lUInt32 domVersion ) {
485 memcpy( _magic, _compressCachedData ? COMPRESSED_CACHE_FILE_MAGIC : UNCOMPRESSED_CACHE_FILE_MAGIC, CACHE_FILE_MAGIC_SIZE );
486 _dirty = dirtyFlag;
487 _dom_version = domVersion;
488 }
489 };
490
491 struct CacheFileHeader : public SimpleCacheFileHeader
492 {
493 lUInt32 _fsize;
494 // Padding to explicitly align the index block structure, and that can be
495 // be initialized to zero for reproducible file contents.
496 lUInt32 _padding;
497 CacheFileItem _indexBlock; // index array block parameters,
498 // duplicate of one of index records which contains
validateCacheFileHeader499 bool validate(lUInt32 domVersionRequested)
500 {
501 if (memcmp(_magic, _compressCachedData ? COMPRESSED_CACHE_FILE_MAGIC : UNCOMPRESSED_CACHE_FILE_MAGIC, CACHE_FILE_MAGIC_SIZE) != 0) {
502 CRLog::error("CacheFileHeader::validate: magic doesn't match");
503 return false;
504 }
505 if ( _dirty!=0 ) {
506 CRLog::error("CacheFileHeader::validate: dirty flag is set");
507 printf("CRE: ignoring cache file (marked as dirty)\n");
508 return false;
509 }
510 if ( _dom_version != domVersionRequested ) {
511 CRLog::error("CacheFileHeader::validate: DOM version mismatch");
512 printf("CRE: ignoring cache file (dom version mismatch)\n");
513 return false;
514 }
515 return true;
516 }
CacheFileHeaderCacheFileHeader517 CacheFileHeader( CacheFileItem * indexRec, int fsize, lUInt32 dirtyFlag, lUInt32 domVersion )
518 : SimpleCacheFileHeader(dirtyFlag, domVersion), _indexBlock(0,0)
519 , _padding(0)
520 {
521 if ( indexRec ) {
522 memcpy( &_indexBlock, indexRec, sizeof(CacheFileItem));
523 } else
524 memset( &_indexBlock, 0, sizeof(CacheFileItem));
525 _fsize = fsize;
526 }
527 };
528
529 /**
530 * Cache file implementation.
531 */
532 class CacheFile
533 {
534 int _sectorSize; // block position and size granularity
535 int _size;
536 bool _indexChanged;
537 bool _dirty;
538 lUInt32 _domVersion;
539 lString32 _cachePath;
540 LVStreamRef _stream; // file stream
541 LVPtrVector<CacheFileItem, true> _index; // full file block index
542 LVPtrVector<CacheFileItem, false> _freeIndex; // free file block index
543 LVHashTable<lUInt32, CacheFileItem*> _map; // hash map for fast search
544 // searches for existing block
545 CacheFileItem * findBlock( lUInt16 type, lUInt16 index );
546 // alocates block at index, reuses existing one, if possible
547 CacheFileItem * allocBlock( lUInt16 type, lUInt16 index, int size );
548 // mark block as free, for later reusing
549 void freeBlock( CacheFileItem * block );
550 // writes file header
551 bool updateHeader();
552 // writes index block
553 bool writeIndex();
554 // reads index from file
555 bool readIndex();
556 // reads all blocks of index and checks CRCs
557 bool validateContents();
558 public:
559 // return current file size
getSize()560 int getSize() { return _size; }
561 // create uninitialized cache file, call open or create to initialize
562 CacheFile(lUInt32 domVersion);
563 // free resources
564 ~CacheFile();
565 // try open existing cache file
566 bool open( lString32 filename );
567 // try open existing cache file from stream
568 bool open( LVStreamRef stream );
569 // create new cache file
570 bool create( lString32 filename );
571 // create new cache file in stream
572 bool create( LVStreamRef stream );
573 /// writes block to file
574 bool write( lUInt16 type, lUInt16 dataIndex, const lUInt8 * buf, int size, bool compress );
575 /// reads and allocates block in memory
576 bool read( lUInt16 type, lUInt16 dataIndex, lUInt8 * &buf, int &size );
577 /// reads and validates block
578 bool validate( CacheFileItem * block );
579 /// writes content of serial buffer
580 bool write( lUInt16 type, lUInt16 index, SerialBuf & buf, bool compress );
581 /// reads content of serial buffer
582 bool read( lUInt16 type, lUInt16 index, SerialBuf & buf );
583 /// writes content of serial buffer
write(lUInt16 type,SerialBuf & buf,bool compress)584 bool write( lUInt16 type, SerialBuf & buf, bool compress )
585 {
586 return write( type, 0, buf, compress);
587 }
588 /// reads content of serial buffer
read(lUInt16 type,SerialBuf & buf)589 bool read( lUInt16 type, SerialBuf & buf )
590 {
591 return read(type, 0, buf);
592 }
593 /// reads block as a stream
594 LVStreamRef readStream(lUInt16 type, lUInt16 index);
595
596 /// sets dirty flag value, returns true if value is changed
597 bool setDirtyFlag( bool dirty );
598 /// sets DOM version value, returns true if value is changed
599 bool setDOMVersion( lUInt32 domVersion );
600 // flushes index
601 bool flush( bool clearDirtyFlag, CRTimerUtil & maxTime );
roundSector(int n)602 int roundSector( int n )
603 {
604 return (n + (_sectorSize-1)) & ~(_sectorSize-1);
605 }
setAutoSyncSize(int sz)606 void setAutoSyncSize(int sz) {
607 _stream->setAutoSyncSize(sz);
608 }
setCachePath(const lString32 cachePath)609 void setCachePath(const lString32 cachePath) {
610 _cachePath = cachePath;
611 }
getCachePath()612 const lString32 getCachePath() {
613 return _cachePath;
614 }
615 };
616
617
618 // create uninitialized cache file, call open or create to initialize
CacheFile(lUInt32 domVersion)619 CacheFile::CacheFile(lUInt32 domVersion)
620 : _sectorSize( CACHE_FILE_SECTOR_SIZE ), _size(0), _indexChanged(false), _dirty(true), _domVersion(domVersion), _map(1024), _cachePath(lString32::empty_str)
621 {
622 }
623
624 // free resources
~CacheFile()625 CacheFile::~CacheFile()
626 {
627 if ( !_stream.isNull() ) {
628 // don't flush -- leave file dirty
629 //CRTimerUtil infinite;
630 //flush( true, infinite );
631 }
632 }
633
634 /// sets dirty flag value, returns true if value is changed
setDirtyFlag(bool dirty)635 bool CacheFile::setDirtyFlag( bool dirty )
636 {
637 if ( _dirty==dirty )
638 return false;
639 if ( !dirty ) {
640 CRLog::info("CacheFile::clearing Dirty flag");
641 _stream->Flush(true);
642 } else {
643 CRLog::info("CacheFile::setting Dirty flag");
644 }
645 _dirty = dirty;
646 SimpleCacheFileHeader hdr(_dirty?1:0, _domVersion);
647 _stream->SetPos(0);
648 lvsize_t bytesWritten = 0;
649 _stream->Write(&hdr, sizeof(hdr), &bytesWritten );
650 if ( bytesWritten!=sizeof(hdr) )
651 return false;
652 _stream->Flush(true);
653 //CRLog::trace("setDirtyFlag : hdr is saved with Dirty flag = %d", hdr._dirty);
654 return true;
655 }
656
setDOMVersion(lUInt32 domVersion)657 bool CacheFile::setDOMVersion( lUInt32 domVersion ) {
658 if ( _domVersion == domVersion )
659 return false;
660 CRLog::info("CacheFile::setting DOM version value");
661 _domVersion = domVersion;
662 SimpleCacheFileHeader hdr(_dirty?1:0, _domVersion);
663 _stream->SetPos(0);
664 lvsize_t bytesWritten = 0;
665 _stream->Write(&hdr, sizeof(hdr), &bytesWritten );
666 if ( bytesWritten!=sizeof(hdr) )
667 return false;
668 _stream->Flush(true);
669 //CRLog::trace("setDOMVersion : hdr is saved with DOM version = %u", hdr._domVersionRequested);
670 return true;
671 }
672
673 // flushes index
flush(bool clearDirtyFlag,CRTimerUtil & maxTime)674 bool CacheFile::flush( bool clearDirtyFlag, CRTimerUtil & maxTime )
675 {
676 if ( clearDirtyFlag ) {
677 //setDirtyFlag(true);
678 if ( !writeIndex() )
679 return false;
680 setDirtyFlag(false);
681 } else {
682 _stream->Flush(false, maxTime);
683 //CRLog::trace("CacheFile->flush() took %d ms ", (int)timer.elapsed());
684 }
685 return true;
686 }
687
688 // reads all blocks of index and checks CRCs
validateContents()689 bool CacheFile::validateContents()
690 {
691 CRLog::info("Started validation of cache file contents");
692 LVHashTable<lUInt32, CacheFileItem*>::pair * pair;
693 for ( LVHashTable<lUInt32, CacheFileItem*>::iterator p = _map.forwardIterator(); (pair=p.next())!=NULL; ) {
694 if ( pair->value->_dataType==CBT_INDEX )
695 continue;
696 if ( !validate(pair->value) ) {
697 CRLog::error("Contents validation is failed for block type=%d index=%d", (int)pair->value->_dataType, pair->value->_dataIndex );
698 return false;
699 }
700 }
701 CRLog::info("Finished validation of cache file contents -- successful");
702 return true;
703 }
704
705 // reads index from file
readIndex()706 bool CacheFile::readIndex()
707 {
708 CacheFileHeader hdr(NULL, _size, 0, 0);
709 _stream->SetPos(0);
710 lvsize_t bytesRead = 0;
711 _stream->Read(&hdr, sizeof(hdr), &bytesRead );
712 if ( bytesRead!=sizeof(hdr) )
713 return false;
714 CRLog::info("Header read: DirtyFlag=%d", hdr._dirty);
715 CRLog::info("Header read: DOM level=%u", hdr._dom_version);
716 if ( !hdr.validate(_domVersion) )
717 return false;
718 if ( (int)hdr._fsize > _size + 4096-1 ) {
719 CRLog::error("CacheFile::readIndex: file size doesn't match with header");
720 return false;
721 }
722 if ( !hdr._indexBlock._blockFilePos )
723 return true; // empty index is ok
724 if ( hdr._indexBlock._blockFilePos>=(int)hdr._fsize || hdr._indexBlock._blockFilePos+hdr._indexBlock._blockSize>(int)hdr._fsize+4096-1 ) {
725 CRLog::error("CacheFile::readIndex: Wrong index file position specified in header");
726 return false;
727 }
728 if ((int)_stream->SetPos(hdr._indexBlock._blockFilePos)!=hdr._indexBlock._blockFilePos ) {
729 CRLog::error("CacheFile::readIndex: cannot move file position to index block");
730 return false;
731 }
732 int count = hdr._indexBlock._dataSize / sizeof(CacheFileItem);
733 if ( count<0 || count>100000 ) {
734 CRLog::error("CacheFile::readIndex: invalid number of blocks in index");
735 return false;
736 }
737 CacheFileItem * index = new CacheFileItem[count];
738 bytesRead = 0;
739 lvsize_t sz = sizeof(CacheFileItem)*count;
740 _stream->Read(index, sz, &bytesRead );
741 if ( bytesRead!=sz )
742 return false;
743 // check CRC
744 lUInt32 hash = calcHash( (lUInt8*)index, sz );
745 if ( hdr._indexBlock._dataHash!=hash ) {
746 CRLog::error("CacheFile::readIndex: CRC doesn't match found %08x expected %08x", hash, hdr._indexBlock._dataHash);
747 delete[] index;
748 return false;
749 }
750 for ( int i=0; i<count; i++ ) {
751 if (index[i]._dataType == CBT_INDEX)
752 index[i] = hdr._indexBlock;
753 if ( !index[i].validate(_size) ) {
754 delete[] index;
755 return false;
756 }
757 CacheFileItem * item = new CacheFileItem();
758 memcpy(item, &index[i], sizeof(CacheFileItem));
759 _index.add( item );
760 lUInt32 key = ((lUInt32)item->_dataType)<<16 | item->_dataIndex;
761 if ( key==0 )
762 _freeIndex.add( item );
763 else
764 _map.set( key, item );
765 }
766 delete[] index;
767 CacheFileItem * indexitem = findBlock(CBT_INDEX, 0);
768 if ( !indexitem ) {
769 CRLog::error("CacheFile::readIndex: index block info doesn't match header");
770 return false;
771 }
772 _dirty = hdr._dirty ? true : false;
773 return true;
774 }
775
776 // writes index block
writeIndex()777 bool CacheFile::writeIndex()
778 {
779 if ( !_indexChanged )
780 return true; // no changes: no writes
781
782 if ( _index.length()==0 )
783 return updateHeader();
784
785 // create copy of index in memory
786 int count = _index.length();
787 CacheFileItem * indexItem = findBlock(CBT_INDEX, 0);
788 if (!indexItem) {
789 int sz = sizeof(CacheFileItem) * (count * 2 + 100);
790 allocBlock(CBT_INDEX, 0, sz);
791 indexItem = findBlock(CBT_INDEX, 0);
792 (void)indexItem; // silences clang warning
793 count = _index.length();
794 }
795 CacheFileItem * index = new CacheFileItem[count]();
796 int sz = count * sizeof(CacheFileItem);
797 for ( int i = 0; i < count; i++ ) {
798 memcpy( &index[i], _index[i], sizeof(CacheFileItem) );
799 if (index[i]._dataType == CBT_INDEX) {
800 index[i]._dataHash = 0;
801 index[i]._packedHash = 0;
802 index[i]._dataSize = 0;
803 }
804 }
805 bool res = write(CBT_INDEX, 0, (const lUInt8*)index, sz, false);
806 delete[] index;
807
808 indexItem = findBlock(CBT_INDEX, 0);
809 if ( !res || !indexItem ) {
810 CRLog::error("CacheFile::writeIndex: error while writing index!!!");
811 return false;
812 }
813
814 updateHeader();
815 _indexChanged = false;
816 return true;
817 }
818
819 // writes file header
updateHeader()820 bool CacheFile::updateHeader()
821 {
822 CacheFileItem * indexItem = NULL;
823 indexItem = findBlock(CBT_INDEX, 0);
824 CacheFileHeader hdr(indexItem, _size, _dirty?1:0, _domVersion);
825 _stream->SetPos(0);
826 lvsize_t bytesWritten = 0;
827 _stream->Write(&hdr, sizeof(hdr), &bytesWritten );
828 if ( bytesWritten!=sizeof(hdr) )
829 return false;
830 //CRLog::trace("updateHeader finished: Dirty flag = %d", hdr._dirty);
831 return true;
832 }
833
834 //
freeBlock(CacheFileItem * block)835 void CacheFile::freeBlock( CacheFileItem * block )
836 {
837 lUInt32 key = ((lUInt32)block->_dataType)<<16 | block->_dataIndex;
838 _map.remove(key);
839 block->_dataIndex = 0;
840 block->_dataType = 0;
841 block->_dataSize = 0;
842 _freeIndex.add( block );
843 }
844
845 /// reads block as a stream
readStream(lUInt16 type,lUInt16 index)846 LVStreamRef CacheFile::readStream(lUInt16 type, lUInt16 index)
847 {
848 CacheFileItem * block = findBlock(type, index);
849 if (block && block->_dataSize) {
850 #if 0
851 lUInt8 * buf = NULL;
852 int size = 0;
853 if (read(type, index, buf, size))
854 return LVCreateMemoryStream(buf, size);
855 #else
856 return LVStreamRef(new LVStreamFragment(_stream, block->_blockFilePos, block->_dataSize));
857 #endif
858 }
859 return LVStreamRef();
860 }
861
862 // searches for existing block
findBlock(lUInt16 type,lUInt16 index)863 CacheFileItem * CacheFile::findBlock( lUInt16 type, lUInt16 index )
864 {
865 lUInt32 key = ((lUInt32)type)<<16 | index;
866 CacheFileItem * existing = _map.get( key );
867 return existing;
868 }
869
870 // allocates index record for block, sets its new size
allocBlock(lUInt16 type,lUInt16 index,int size)871 CacheFileItem * CacheFile::allocBlock( lUInt16 type, lUInt16 index, int size )
872 {
873 lUInt32 key = ((lUInt32)type)<<16 | index;
874 CacheFileItem * existing = _map.get( key );
875 if ( existing ) {
876 if ( existing->_blockSize >= size ) {
877 if ( existing->_dataSize != size ) {
878 existing->_dataSize = size;
879 _indexChanged = true;
880 }
881 return existing;
882 }
883 // old block has not enough space: free it
884 freeBlock( existing );
885 existing = NULL;
886 }
887 // search for existing free block of proper size
888 int bestSize = -1;
889 //int bestIndex = -1;
890 for ( int i=0; i<_freeIndex.length(); i++ ) {
891 if ( _freeIndex[i] && (_freeIndex[i]->_blockSize>=size) && (bestSize==-1 || _freeIndex[i]->_blockSize<bestSize) ) {
892 bestSize = _freeIndex[i]->_blockSize;
893 //bestIndex = -1;
894 existing = _freeIndex[i];
895 }
896 }
897 if ( existing ) {
898 _freeIndex.remove( existing );
899 existing->_dataType = type;
900 existing->_dataIndex = index;
901 existing->_dataSize = size;
902 _map.set( key, existing );
903 _indexChanged = true;
904 return existing;
905 }
906 // allocate new block
907 CacheFileItem * block = new CacheFileItem( type, index );
908 _map.set( key, block );
909 block->_blockSize = roundSector(size);
910 block->_dataSize = size;
911 block->_blockIndex = _index.length();
912 _index.add(block);
913 block->_blockFilePos = _size;
914 _size += block->_blockSize;
915 _indexChanged = true;
916 // really, file size is not extended
917 return block;
918 }
919
920 /// reads and validates block
validate(CacheFileItem * block)921 bool CacheFile::validate( CacheFileItem * block )
922 {
923 lUInt8 * buf = NULL;
924 unsigned size = 0;
925
926 if ( (int)_stream->SetPos( block->_blockFilePos )!=block->_blockFilePos ) {
927 CRLog::error("CacheFile::validate: Cannot set position for block %d:%d of size %d", block->_dataType, block->_dataIndex, (int)size);
928 return false;
929 }
930
931 // read block from file
932 size = block->_dataSize;
933 buf = (lUInt8 *)malloc(size);
934 lvsize_t bytesRead = 0;
935 _stream->Read(buf, size, &bytesRead );
936 if ( bytesRead!=size ) {
937 CRLog::error("CacheFile::validate: Cannot read block %d:%d of size %d", block->_dataType, block->_dataIndex, (int)size);
938 free(buf);
939 return false;
940 }
941
942 // check CRC for file block
943 lUInt32 packedhash = calcHash( buf, size );
944 if ( packedhash!=block->_packedHash ) {
945 CRLog::error("CacheFile::validate: packed data CRC doesn't match for block %d:%d of size %d", block->_dataType, block->_dataIndex, (int)size);
946 free(buf);
947 return false;
948 }
949 free(buf);
950 return true;
951 }
952
953 // reads and allocates block in memory
read(lUInt16 type,lUInt16 dataIndex,lUInt8 * & buf,int & size)954 bool CacheFile::read( lUInt16 type, lUInt16 dataIndex, lUInt8 * &buf, int &size )
955 {
956 buf = NULL;
957 size = 0;
958 CacheFileItem * block = findBlock( type, dataIndex );
959 if ( !block ) {
960 CRLog::error("CacheFile::read: Block %d:%d not found in file", type, dataIndex);
961 return false;
962 }
963 if ( (int)_stream->SetPos( block->_blockFilePos )!=block->_blockFilePos )
964 return false;
965
966 // read block from file
967 size = block->_dataSize;
968 buf = (lUInt8 *)malloc(size);
969 lvsize_t bytesRead = 0;
970 _stream->Read(buf, size, &bytesRead );
971 if ( (int)bytesRead!=size ) {
972 CRLog::error("CacheFile::read: Cannot read block %d:%d of size %d, bytesRead=%d", type, dataIndex, (int)size, (int)bytesRead);
973 free(buf);
974 buf = NULL;
975 size = 0;
976 return false;
977 }
978
979 bool compress = block->_uncompressedSize!=0;
980
981 if ( compress ) {
982 // block is compressed
983
984 // check crc separately only for compressed data
985 lUInt32 packedhash = calcHash( buf, size );
986 if ( packedhash!=block->_packedHash ) {
987 CRLog::error("CacheFile::read: packed data CRC doesn't match for block %d:%d of size %d", type, dataIndex, (int)size);
988 free(buf);
989 buf = NULL;
990 size = 0;
991 return false;
992 }
993
994 // uncompress block data
995 lUInt8 * uncomp_buf = NULL;
996 lUInt32 uncomp_size = 0;
997 if ( ldomUnpack(buf, size, uncomp_buf, uncomp_size) && uncomp_size==block->_uncompressedSize ) {
998 free( buf );
999 buf = uncomp_buf;
1000 size = uncomp_size;
1001 } else {
1002 CRLog::error("CacheFile::read: error while uncompressing data for block %d:%d of size %d", type, dataIndex, (int)size);
1003 free(buf);
1004 buf = NULL;
1005 size = 0;
1006 return false;
1007 }
1008 }
1009
1010 // check CRC
1011 lUInt32 hash = calcHash( buf, size );
1012 if (hash != block->_dataHash) {
1013 CRLog::error("CacheFile::read: CRC doesn't match for block %d:%d of size %d", type, dataIndex, (int)size);
1014 free(buf);
1015 buf = NULL;
1016 size = 0;
1017 return false;
1018 }
1019 // Success. Don't forget to free allocated block externally
1020 return true;
1021 }
1022
1023 // writes block to file
write(lUInt16 type,lUInt16 dataIndex,const lUInt8 * buf,int size,bool compress)1024 bool CacheFile::write( lUInt16 type, lUInt16 dataIndex, const lUInt8 * buf, int size, bool compress )
1025 {
1026 // check whether data is changed
1027 lUInt32 newhash = calcHash( buf, size );
1028 CacheFileItem * existingblock = findBlock( type, dataIndex );
1029
1030 if (existingblock) {
1031 bool sameSize = ((int)existingblock->_uncompressedSize==size) || (existingblock->_uncompressedSize==0 && (int)existingblock->_dataSize==size);
1032 if (sameSize && existingblock->_dataHash == newhash ) {
1033 return true;
1034 }
1035 }
1036
1037 #if 0
1038 if (existingblock)
1039 CRLog::trace("* oldsz=%d oldhash=%08x", (int)existingblock->_uncompressedSize, (int)existingblock->_dataHash);
1040 CRLog::trace("* wr block t=%d[%d] sz=%d hash=%08x", type, dataIndex, size, newhash);
1041 #endif
1042 setDirtyFlag(true);
1043
1044 lUInt32 uncompressedSize = 0;
1045 lUInt64 newpackedhash = newhash;
1046 if (!_compressCachedData)
1047 compress = false;
1048 if ( compress ) {
1049 lUInt8 * dstbuf = NULL;
1050 lUInt32 dstsize = 0;
1051 if ( !ldomPack( buf, size, dstbuf, dstsize ) ) {
1052 compress = false;
1053 } else {
1054 uncompressedSize = size;
1055 size = dstsize;
1056 buf = dstbuf;
1057 newpackedhash = calcHash( buf, size );
1058 #if DEBUG_DOM_STORAGE==1
1059 //CRLog::trace("packed block %d:%d : %d to %d bytes (%d%%)", type, dataIndex, srcsize, dstsize, srcsize>0?(100*dstsize/srcsize):0 );
1060 #endif
1061 }
1062 }
1063
1064 CacheFileItem * block = NULL;
1065 if ( existingblock && existingblock->_dataSize>=size ) {
1066 // reuse existing block
1067 block = existingblock;
1068 } else {
1069 // allocate new block
1070 if ( existingblock )
1071 freeBlock( existingblock );
1072 block = allocBlock( type, dataIndex, size );
1073 }
1074 if ( !block )
1075 {
1076 #if DOC_DATA_COMPRESSION_LEVEL!=0
1077 if ( compress ) {
1078 free( (void*)buf );
1079 }
1080 #endif
1081 return false;
1082 }
1083 if ( (int)_stream->SetPos( block->_blockFilePos )!=block->_blockFilePos )
1084 {
1085 #if DOC_DATA_COMPRESSION_LEVEL!=0
1086 if ( compress ) {
1087 free( (void*)buf );
1088 }
1089 #endif
1090 return false;
1091 }
1092 // assert: size == block->_dataSize
1093 // actual writing of data
1094 block->_dataSize = size;
1095 lvsize_t bytesWritten = 0;
1096 _stream->Write(buf, size, &bytesWritten );
1097 if ( (int)bytesWritten!=size )
1098 {
1099 #if DOC_DATA_COMPRESSION_LEVEL!=0
1100 if ( compress ) {
1101 free( (void*)buf );
1102 }
1103 #endif
1104 return false;
1105 }
1106 #if CACHE_FILE_WRITE_BLOCK_PADDING==1
1107 int paddingSize = block->_blockSize - size; //roundSector( size ) - size
1108 if ( paddingSize ) {
1109 if ((int)block->_blockFilePos + (int)block->_dataSize >= (int)_stream->GetSize() - _sectorSize) {
1110 LASSERT(size + paddingSize == block->_blockSize );
1111 // if (paddingSize > 16384) {
1112 // CRLog::error("paddingSize > 16384");
1113 // }
1114 // LASSERT(paddingSize <= 16384);
1115 lUInt8 tmp[16384];//paddingSize];
1116 memset(tmp, 0xFF, paddingSize < 16384 ? paddingSize : 16384);
1117 do {
1118 int blkSize = paddingSize < 16384 ? paddingSize : 16384;
1119 _stream->Write(tmp, blkSize, &bytesWritten );
1120 paddingSize -= blkSize;
1121 } while (paddingSize > 0);
1122 }
1123 }
1124 #endif
1125 //_stream->Flush(true);
1126 // update CRC
1127 block->_dataHash = newhash;
1128 block->_packedHash = newpackedhash;
1129 block->_uncompressedSize = uncompressedSize;
1130
1131 if ( compress ) {
1132 free( (void*)buf );
1133 }
1134 _indexChanged = true;
1135
1136 //CRLog::error("CacheFile::write: block %d:%d (pos %ds, size %ds) is written (crc=%08x)", type, dataIndex, (int)block->_blockFilePos/_sectorSize, (int)(size+_sectorSize-1)/_sectorSize, block->_dataCRC);
1137 // success
1138 return true;
1139 }
1140
1141 /// writes content of serial buffer
write(lUInt16 type,lUInt16 index,SerialBuf & buf,bool compress)1142 bool CacheFile::write( lUInt16 type, lUInt16 index, SerialBuf & buf, bool compress )
1143 {
1144 return write( type, index, buf.buf(), buf.pos(), compress );
1145 }
1146
1147 /// reads content of serial buffer
read(lUInt16 type,lUInt16 index,SerialBuf & buf)1148 bool CacheFile::read( lUInt16 type, lUInt16 index, SerialBuf & buf )
1149 {
1150 lUInt8 * tmp = NULL;
1151 int size = 0;
1152 bool res = read( type, index, tmp, size );
1153 if ( res ) {
1154 buf.set( tmp, size );
1155 }
1156 buf.setPos(0);
1157 return res;
1158 }
1159
1160 // try open existing cache file
open(lString32 filename)1161 bool CacheFile::open( lString32 filename )
1162 {
1163 LVStreamRef stream = LVOpenFileStream( filename.c_str(), LVOM_APPEND );
1164 if ( !stream ) {
1165 CRLog::error( "CacheFile::open: cannot open file %s", LCSTR(filename));
1166 return false;
1167 }
1168 crSetFileToRemoveOnFatalError(LCSTR(filename));
1169 return open(stream);
1170 }
1171
1172
1173 // try open existing cache file
open(LVStreamRef stream)1174 bool CacheFile::open( LVStreamRef stream )
1175 {
1176 _stream = stream;
1177 _size = _stream->GetSize();
1178 //_stream->setAutoSyncSize(STREAM_AUTO_SYNC_SIZE);
1179
1180 if ( !readIndex() ) {
1181 CRLog::error("CacheFile::open : cannot read index from file");
1182 return false;
1183 }
1184 if (_enableCacheFileContentsValidation && !validateContents() ) {
1185 CRLog::error("CacheFile::open : file contents validation failed");
1186 return false;
1187 }
1188 return true;
1189 }
1190
create(lString32 filename)1191 bool CacheFile::create( lString32 filename )
1192 {
1193 LVStreamRef stream = LVOpenFileStream( filename.c_str(), LVOM_APPEND );
1194 if ( _stream.isNull() ) {
1195 CRLog::error( "CacheFile::create: cannot create file %s", LCSTR(filename));
1196 return false;
1197 }
1198 crSetFileToRemoveOnFatalError(LCSTR(filename));
1199 return create(stream);
1200 }
1201
create(LVStreamRef stream)1202 bool CacheFile::create( LVStreamRef stream )
1203 {
1204 _stream = stream;
1205 //_stream->setAutoSyncSize(STREAM_AUTO_SYNC_SIZE);
1206 if ( _stream->SetPos(0)!=0 ) {
1207 CRLog::error( "CacheFile::create: cannot seek file");
1208 _stream.Clear();
1209 return false;
1210 }
1211
1212 _size = _sectorSize;
1213 LVArray<lUInt8> sector0(_sectorSize, 0);
1214 lvsize_t bytesWritten = 0;
1215 _stream->Write(sector0.get(), _sectorSize, &bytesWritten );
1216 if ( (int)bytesWritten!=_sectorSize ) {
1217 _stream.Clear();
1218 return false;
1219 }
1220 if (!updateHeader()) {
1221 _stream.Clear();
1222 return false;
1223 }
1224 return true;
1225 }
1226
1227 // BLOB storage
1228
1229 class ldomBlobItem {
1230 int _storageIndex;
1231 lString32 _name;
1232 int _size;
1233 lUInt8 * _data;
1234 public:
ldomBlobItem(lString32 name)1235 ldomBlobItem( lString32 name ) : _storageIndex(-1), _name(name), _size(0), _data(NULL) {
1236
1237 }
~ldomBlobItem()1238 ~ldomBlobItem() {
1239 if ( _data )
1240 delete[] _data;
1241 }
getSize()1242 int getSize() { return _size; }
getIndex()1243 int getIndex() { return _storageIndex; }
getData()1244 lUInt8 * getData() { return _data; }
getName()1245 lString32 getName() { return _name; }
setIndex(int index,int size)1246 void setIndex(int index, int size) {
1247 if ( _data )
1248 delete[] _data;
1249 _data = NULL;
1250 _storageIndex = index;
1251 _size = size;
1252 }
setData(const lUInt8 * data,int size)1253 void setData( const lUInt8 * data, int size ) {
1254 if ( _data )
1255 delete[] _data;
1256 if (data && size>0) {
1257 _data = new lUInt8[size];
1258 memcpy(_data, data, size);
1259 _size = size;
1260 } else {
1261 _data = NULL;
1262 _size = -1;
1263 }
1264 }
1265 };
1266
ldomBlobCache()1267 ldomBlobCache::ldomBlobCache() : _cacheFile(NULL), _changed(false)
1268 {
1269
1270 }
1271
1272 #define BLOB_INDEX_MAGIC "BLOBINDX"
1273
loadIndex()1274 bool ldomBlobCache::loadIndex()
1275 {
1276 bool res;
1277 SerialBuf buf(0,true);
1278 res = _cacheFile->read(CBT_BLOB_INDEX, buf);
1279 if (!res) {
1280 _list.clear();
1281 return true; // missing blob index: treat as empty list of blobs
1282 }
1283 if (!buf.checkMagic(BLOB_INDEX_MAGIC))
1284 return false;
1285 lUInt32 len;
1286 buf >> len;
1287 for ( lUInt32 i = 0; i<len; i++ ) {
1288 lString32 name;
1289 buf >> name;
1290 lUInt32 size;
1291 buf >> size;
1292 if (buf.error())
1293 break;
1294 ldomBlobItem * item = new ldomBlobItem(name);
1295 item->setIndex(i, size);
1296 _list.add(item);
1297 }
1298 res = !buf.error();
1299 return res;
1300 }
1301
saveIndex()1302 bool ldomBlobCache::saveIndex()
1303 {
1304 bool res;
1305 SerialBuf buf(0,true);
1306 buf.putMagic(BLOB_INDEX_MAGIC);
1307 lUInt32 len = _list.length();
1308 buf << len;
1309 for ( lUInt32 i = 0; i<len; i++ ) {
1310 ldomBlobItem * item = _list[i];
1311 buf << item->getName();
1312 buf << (lUInt32)item->getSize();
1313 }
1314 res = _cacheFile->write( CBT_BLOB_INDEX, buf, false );
1315 return res;
1316 }
1317
saveToCache(CRTimerUtil & timeout)1318 ContinuousOperationResult ldomBlobCache::saveToCache(CRTimerUtil & timeout)
1319 {
1320 if (!_list.length() || !_changed || _cacheFile==NULL)
1321 return CR_DONE;
1322 bool res = true;
1323 for ( int i=0; i<_list.length(); i++ ) {
1324 ldomBlobItem * item = _list[i];
1325 if ( item->getData() ) {
1326 res = _cacheFile->write(CBT_BLOB_DATA, i, item->getData(), item->getSize(), false) && res;
1327 if (res)
1328 item->setIndex(i, item->getSize());
1329 }
1330 if (timeout.expired())
1331 return CR_TIMEOUT;
1332 }
1333 res = saveIndex() && res;
1334 if ( res )
1335 _changed = false;
1336 return res ? CR_DONE : CR_ERROR;
1337 }
1338
setCacheFile(CacheFile * cacheFile)1339 void ldomBlobCache::setCacheFile( CacheFile * cacheFile )
1340 {
1341 _cacheFile = cacheFile;
1342 CRTimerUtil infinite;
1343 if (_list.empty())
1344 loadIndex();
1345 else
1346 saveToCache(infinite);
1347 }
1348
addBlob(const lUInt8 * data,int size,lString32 name)1349 bool ldomBlobCache::addBlob( const lUInt8 * data, int size, lString32 name )
1350 {
1351 CRLog::debug("ldomBlobCache::addBlob( %s, size=%d, [%02x,%02x,%02x,%02x] )", LCSTR(name), size, data[0], data[1], data[2], data[3]);
1352 int index = _list.length();
1353 ldomBlobItem * item = new ldomBlobItem(name);
1354 if (_cacheFile != NULL) {
1355 _cacheFile->write(CBT_BLOB_DATA, index, data, size, false);
1356 item->setIndex(index, size);
1357 } else {
1358 item->setData(data, size);
1359 }
1360 _list.add(item);
1361 _changed = true;
1362 return true;
1363 }
1364
getBlob(lString32 name)1365 LVStreamRef ldomBlobCache::getBlob( lString32 name )
1366 {
1367 ldomBlobItem * item = NULL;
1368 lUInt16 index = 0;
1369 for ( int i=0; i<_list.length(); i++ ) {
1370 if (_list[i]->getName() == name) {
1371 item = _list[i];
1372 index = i;
1373 break;
1374 }
1375 }
1376 if (item) {
1377 if (item->getData()) {
1378 // RAM
1379 return LVCreateMemoryStream(item->getData(), item->getSize(), true);
1380 } else {
1381 // CACHE FILE
1382 return _cacheFile->readStream(CBT_BLOB_DATA, index);
1383 }
1384 }
1385 return LVStreamRef();
1386 }
1387
1388 #if BUILD_LITE!=1
1389 //#define DEBUG_RENDER_RECT_ACCESS
1390 #ifdef DEBUG_RENDER_RECT_ACCESS
1391 static signed char render_rect_flags[200000]={0};
rr_lock(ldomNode * node)1392 static void rr_lock( ldomNode * node )
1393 {
1394 int index = node->getDataIndex()>>4;
1395 CRLog::debug("RenderRectAccessor(%d) lock", index );
1396 if ( render_rect_flags[index] )
1397 crFatalError(123, "render rect accessor: cannot get lock");
1398 render_rect_flags[index] = 1;
1399 }
rr_unlock(ldomNode * node)1400 static void rr_unlock( ldomNode * node )
1401 {
1402 int index = node->getDataIndex()>>4;
1403 CRLog::debug("RenderRectAccessor(%d) lock", index );
1404 if ( !render_rect_flags[index] )
1405 crFatalError(123, "render rect accessor: unlock w/o lock");
1406 render_rect_flags[index] = 0;
1407 }
1408 #endif
1409
RenderRectAccessor(ldomNode * node)1410 RenderRectAccessor::RenderRectAccessor( ldomNode * node )
1411 : _node(node), _modified(false), _dirty(false)
1412 {
1413 #ifdef DEBUG_RENDER_RECT_ACCESS
1414 rr_lock( _node );
1415 #endif
1416 _node->getRenderData(*this);
1417 }
1418
~RenderRectAccessor()1419 RenderRectAccessor::~RenderRectAccessor()
1420 {
1421 if ( _modified )
1422 _node->setRenderData(*this);
1423 #ifdef DEBUG_RENDER_RECT_ACCESS
1424 if ( !_dirty )
1425 rr_unlock( _node );
1426 #endif
1427 }
1428
clear()1429 void RenderRectAccessor::clear()
1430 {
1431 lvdomElementFormatRec::clear(); // will clear every field
1432 _modified = true;
1433 _dirty = false;
1434 }
1435
push()1436 void RenderRectAccessor::push()
1437 {
1438 if ( _modified ) {
1439 _node->setRenderData(*this);
1440 _modified = false;
1441 _dirty = true;
1442 #ifdef DEBUG_RENDER_RECT_ACCESS
1443 rr_unlock( _node );
1444 #endif
1445 }
1446 }
1447
setX(int x)1448 void RenderRectAccessor::setX( int x )
1449 {
1450 if ( _dirty ) {
1451 _dirty = false;
1452 _node->getRenderData(*this);
1453 #ifdef DEBUG_RENDER_RECT_ACCESS
1454 rr_lock( _node );
1455 #endif
1456 }
1457 if ( _x != x ) {
1458 _x = x;
1459 _modified = true;
1460 }
1461 }
setY(int y)1462 void RenderRectAccessor::setY( int y )
1463 {
1464 if ( _dirty ) {
1465 _dirty = false;
1466 _node->getRenderData(*this);
1467 #ifdef DEBUG_RENDER_RECT_ACCESS
1468 rr_lock( _node );
1469 #endif
1470 }
1471 if ( _y != y ) {
1472 _y = y;
1473 _modified = true;
1474 }
1475 }
setWidth(int w)1476 void RenderRectAccessor::setWidth( int w )
1477 {
1478 if ( _dirty ) {
1479 _dirty = false;
1480 _node->getRenderData(*this);
1481 #ifdef DEBUG_RENDER_RECT_ACCESS
1482 rr_lock( _node );
1483 #endif
1484 }
1485 if ( _width != w ) {
1486 _width = w;
1487 _modified = true;
1488 }
1489 }
setHeight(int h)1490 void RenderRectAccessor::setHeight( int h )
1491 {
1492 if ( _dirty ) {
1493 _dirty = false;
1494 _node->getRenderData(*this);
1495 #ifdef DEBUG_RENDER_RECT_ACCESS
1496 rr_lock( _node );
1497 #endif
1498 }
1499 if ( _height != h ) {
1500 _height = h;
1501 _modified = true;
1502 }
1503 }
1504
getX()1505 int RenderRectAccessor::getX()
1506 {
1507 if ( _dirty ) {
1508 _dirty = false;
1509 _node->getRenderData(*this);
1510 #ifdef DEBUG_RENDER_RECT_ACCESS
1511 rr_lock( _node );
1512 #endif
1513 }
1514 return _x;
1515 }
getY()1516 int RenderRectAccessor::getY()
1517 {
1518 if ( _dirty ) {
1519 _dirty = false;
1520 _node->getRenderData(*this);
1521 #ifdef DEBUG_RENDER_RECT_ACCESS
1522 rr_lock( _node );
1523 #endif
1524 }
1525 return _y;
1526 }
getWidth()1527 int RenderRectAccessor::getWidth()
1528 {
1529 if ( _dirty ) {
1530 _dirty = false;
1531 _node->getRenderData(*this);
1532 #ifdef DEBUG_RENDER_RECT_ACCESS
1533 rr_lock( _node );
1534 #endif
1535 }
1536 return _width;
1537 }
getHeight()1538 int RenderRectAccessor::getHeight()
1539 {
1540 if ( _dirty ) {
1541 _dirty = false;
1542 _node->getRenderData(*this);
1543 #ifdef DEBUG_RENDER_RECT_ACCESS
1544 rr_lock( _node );
1545 #endif
1546 }
1547 return _height;
1548 }
getRect(lvRect & rc)1549 void RenderRectAccessor::getRect( lvRect & rc )
1550 {
1551 if ( _dirty ) {
1552 _dirty = false;
1553 _node->getRenderData(*this);
1554 #ifdef DEBUG_RENDER_RECT_ACCESS
1555 rr_lock( _node );
1556 #endif
1557 }
1558 rc.left = _x;
1559 rc.top = _y;
1560 rc.right = _x + _width;
1561 rc.bottom = _y + _height;
1562 }
1563
setInnerX(int x)1564 void RenderRectAccessor::setInnerX( int x )
1565 {
1566 if ( _dirty ) {
1567 _dirty = false;
1568 _node->getRenderData(*this);
1569 #ifdef DEBUG_RENDER_RECT_ACCESS
1570 rr_lock( _node );
1571 #endif
1572 }
1573 if ( _inner_x != x ) {
1574 _inner_x = x;
1575 _modified = true;
1576 }
1577 }
setInnerY(int y)1578 void RenderRectAccessor::setInnerY( int y )
1579 {
1580 if ( _dirty ) {
1581 _dirty = false;
1582 _node->getRenderData(*this);
1583 #ifdef DEBUG_RENDER_RECT_ACCESS
1584 rr_lock( _node );
1585 #endif
1586 }
1587 if ( _inner_y != y ) {
1588 _inner_y = y;
1589 _modified = true;
1590 }
1591 }
setInnerWidth(int w)1592 void RenderRectAccessor::setInnerWidth( int w )
1593 {
1594 if ( _dirty ) {
1595 _dirty = false;
1596 _node->getRenderData(*this);
1597 #ifdef DEBUG_RENDER_RECT_ACCESS
1598 rr_lock( _node );
1599 #endif
1600 }
1601 if ( _inner_width != w ) {
1602 _inner_width = w;
1603 _modified = true;
1604 }
1605 }
getInnerX()1606 int RenderRectAccessor::getInnerX()
1607 {
1608 if ( _dirty ) {
1609 _dirty = false;
1610 _node->getRenderData(*this);
1611 #ifdef DEBUG_RENDER_RECT_ACCESS
1612 rr_lock( _node );
1613 #endif
1614 }
1615 return _inner_x;
1616 }
getInnerY()1617 int RenderRectAccessor::getInnerY()
1618 {
1619 if ( _dirty ) {
1620 _dirty = false;
1621 _node->getRenderData(*this);
1622 #ifdef DEBUG_RENDER_RECT_ACCESS
1623 rr_lock( _node );
1624 #endif
1625 }
1626 return _inner_y;
1627 }
getInnerWidth()1628 int RenderRectAccessor::getInnerWidth()
1629 {
1630 if ( _dirty ) {
1631 _dirty = false;
1632 _node->getRenderData(*this);
1633 #ifdef DEBUG_RENDER_RECT_ACCESS
1634 rr_lock( _node );
1635 #endif
1636 }
1637 return _inner_width;
1638 }
getUsableLeftOverflow()1639 int RenderRectAccessor::getUsableLeftOverflow()
1640 {
1641 if ( _dirty ) {
1642 _dirty = false;
1643 _node->getRenderData(*this);
1644 #ifdef DEBUG_RENDER_RECT_ACCESS
1645 rr_lock( _node );
1646 #endif
1647 }
1648 return _usable_left_overflow;
1649 }
getUsableRightOverflow()1650 int RenderRectAccessor::getUsableRightOverflow()
1651 {
1652 if ( _dirty ) {
1653 _dirty = false;
1654 _node->getRenderData(*this);
1655 #ifdef DEBUG_RENDER_RECT_ACCESS
1656 rr_lock( _node );
1657 #endif
1658 }
1659 return _usable_right_overflow;
1660 }
setUsableLeftOverflow(int dx)1661 void RenderRectAccessor::setUsableLeftOverflow( int dx )
1662 {
1663 if ( _dirty ) {
1664 _dirty = false;
1665 _node->getRenderData(*this);
1666 #ifdef DEBUG_RENDER_RECT_ACCESS
1667 rr_lock( _node );
1668 #endif
1669 }
1670 if ( dx < 0 ) dx = 0; // don't allow a negative value
1671 if ( _usable_left_overflow != dx ) {
1672 _usable_left_overflow = dx;
1673 _modified = true;
1674 }
1675 }
setUsableRightOverflow(int dx)1676 void RenderRectAccessor::setUsableRightOverflow( int dx )
1677 {
1678 if ( _dirty ) {
1679 _dirty = false;
1680 _node->getRenderData(*this);
1681 #ifdef DEBUG_RENDER_RECT_ACCESS
1682 rr_lock( _node );
1683 #endif
1684 }
1685 if ( dx < 0 ) dx = 0; // don't allow a negative value
1686 if ( _usable_right_overflow != dx ) {
1687 _usable_right_overflow = dx;
1688 _modified = true;
1689 }
1690 }
getTopOverflow()1691 int RenderRectAccessor::getTopOverflow()
1692 {
1693 if ( _dirty ) {
1694 _dirty = false;
1695 _node->getRenderData(*this);
1696 #ifdef DEBUG_RENDER_RECT_ACCESS
1697 rr_lock( _node );
1698 #endif
1699 }
1700 return _top_overflow;
1701 }
getBottomOverflow()1702 int RenderRectAccessor::getBottomOverflow()
1703 {
1704 if ( _dirty ) {
1705 _dirty = false;
1706 _node->getRenderData(*this);
1707 #ifdef DEBUG_RENDER_RECT_ACCESS
1708 rr_lock( _node );
1709 #endif
1710 }
1711 return _bottom_overflow;
1712 }
setTopOverflow(int dy)1713 void RenderRectAccessor::setTopOverflow( int dy )
1714 {
1715 if ( _dirty ) {
1716 _dirty = false;
1717 _node->getRenderData(*this);
1718 #ifdef DEBUG_RENDER_RECT_ACCESS
1719 rr_lock( _node );
1720 #endif
1721 }
1722 if ( dy < 0 ) dy = 0; // don't allow a negative value
1723 if ( _top_overflow != dy ) {
1724 _top_overflow = dy;
1725 _modified = true;
1726 }
1727 }
setBottomOverflow(int dy)1728 void RenderRectAccessor::setBottomOverflow( int dy )
1729 {
1730 if ( _dirty ) {
1731 _dirty = false;
1732 _node->getRenderData(*this);
1733 #ifdef DEBUG_RENDER_RECT_ACCESS
1734 rr_lock( _node );
1735 #endif
1736 }
1737 if ( dy < 0 ) dy = 0; // don't allow a negative value
1738 if ( _bottom_overflow != dy ) {
1739 _bottom_overflow = dy;
1740 _modified = true;
1741 }
1742 }
getBaseline()1743 int RenderRectAccessor::getBaseline()
1744 {
1745 if ( _dirty ) {
1746 _dirty = false;
1747 _node->getRenderData(*this);
1748 #ifdef DEBUG_RENDER_RECT_ACCESS
1749 rr_lock( _node );
1750 #endif
1751 }
1752 return _baseline;
1753 }
setBaseline(int baseline)1754 void RenderRectAccessor::setBaseline( int baseline )
1755 {
1756 if ( _dirty ) {
1757 _dirty = false;
1758 _node->getRenderData(*this);
1759 #ifdef DEBUG_RENDER_RECT_ACCESS
1760 rr_lock( _node );
1761 #endif
1762 }
1763 if ( _baseline != baseline ) {
1764 _baseline = baseline;
1765 _modified = true;
1766 }
1767 }
getListPropNodeIndex()1768 int RenderRectAccessor::getListPropNodeIndex()
1769 {
1770 if ( _dirty ) {
1771 _dirty = false;
1772 _node->getRenderData(*this);
1773 #ifdef DEBUG_RENDER_RECT_ACCESS
1774 rr_lock( _node );
1775 #endif
1776 }
1777 return _listprop_node_idx;
1778 }
setListPropNodeIndex(int idx)1779 void RenderRectAccessor::setListPropNodeIndex( int idx )
1780 {
1781 if ( _dirty ) {
1782 _dirty = false;
1783 _node->getRenderData(*this);
1784 #ifdef DEBUG_RENDER_RECT_ACCESS
1785 rr_lock( _node );
1786 #endif
1787 }
1788 if ( _listprop_node_idx != idx ) {
1789 _listprop_node_idx = idx;
1790 _modified = true;
1791 }
1792 }
getLangNodeIndex()1793 int RenderRectAccessor::getLangNodeIndex()
1794 {
1795 if ( _dirty ) {
1796 _dirty = false;
1797 _node->getRenderData(*this);
1798 #ifdef DEBUG_RENDER_RECT_ACCESS
1799 rr_lock( _node );
1800 #endif
1801 }
1802 return _lang_node_idx;
1803 }
setLangNodeIndex(int idx)1804 void RenderRectAccessor::setLangNodeIndex( int idx )
1805 {
1806 if ( _dirty ) {
1807 _dirty = false;
1808 _node->getRenderData(*this);
1809 #ifdef DEBUG_RENDER_RECT_ACCESS
1810 rr_lock( _node );
1811 #endif
1812 }
1813 if ( _lang_node_idx != idx ) {
1814 _lang_node_idx = idx;
1815 _modified = true;
1816 }
1817 }
getFlags()1818 unsigned short RenderRectAccessor::getFlags()
1819 {
1820 if ( _dirty ) {
1821 _dirty = false;
1822 _node->getRenderData(*this);
1823 #ifdef DEBUG_RENDER_RECT_ACCESS
1824 rr_lock( _node );
1825 #endif
1826 }
1827 return _flags;
1828 }
setFlags(unsigned short flags)1829 void RenderRectAccessor::setFlags( unsigned short flags )
1830 {
1831 if ( _dirty ) {
1832 _dirty = false;
1833 _node->getRenderData(*this);
1834 #ifdef DEBUG_RENDER_RECT_ACCESS
1835 rr_lock( _node );
1836 #endif
1837 }
1838 if ( _flags != flags ) {
1839 _flags = flags;
1840 _modified = true;
1841 }
1842 }
getTopRectsExcluded(int & lw,int & lh,int & rw,int & rh)1843 void RenderRectAccessor::getTopRectsExcluded( int & lw, int & lh, int & rw, int & rh )
1844 {
1845 if ( _dirty ) {
1846 _dirty = false;
1847 _node->getRenderData(*this);
1848 #ifdef DEBUG_RENDER_RECT_ACCESS
1849 rr_lock( _node );
1850 #endif
1851 }
1852 lw = _extra1 >> 16; // Both stored in a single int slot (widths are
1853 rw = _extra1 & 0xFFFF; // constrained to lUint16 in many other places)
1854 lh = _extra2;
1855 rh = _extra3;
1856 }
setTopRectsExcluded(int lw,int lh,int rw,int rh)1857 void RenderRectAccessor::setTopRectsExcluded( int lw, int lh, int rw, int rh )
1858 {
1859 if ( _dirty ) {
1860 _dirty = false;
1861 _node->getRenderData(*this);
1862 #ifdef DEBUG_RENDER_RECT_ACCESS
1863 rr_lock( _node );
1864 #endif
1865 }
1866 if ( _extra2 != lh || _extra3 != rh || (_extra1>>16) != lw || (_extra1&0xFFFF) != rw ) {
1867 _extra1 = (lw<<16) + rw;
1868 _extra2 = lh;
1869 _extra3 = rh;
1870 _modified = true;
1871 }
1872 }
getNextFloatMinYs(int & left,int & right)1873 void RenderRectAccessor::getNextFloatMinYs( int & left, int & right )
1874 {
1875 if ( _dirty ) {
1876 _dirty = false;
1877 _node->getRenderData(*this);
1878 #ifdef DEBUG_RENDER_RECT_ACCESS
1879 rr_lock( _node );
1880 #endif
1881 }
1882 left = _extra4;
1883 right = _extra5;
1884 }
setNextFloatMinYs(int left,int right)1885 void RenderRectAccessor::setNextFloatMinYs( int left, int right )
1886 {
1887 if ( _dirty ) {
1888 _dirty = false;
1889 _node->getRenderData(*this);
1890 #ifdef DEBUG_RENDER_RECT_ACCESS
1891 rr_lock( _node );
1892 #endif
1893 }
1894 if ( _extra4 != left || _extra5 != right ) {
1895 _extra4 = left;
1896 _extra5 = right;
1897 _modified = true;
1898 }
1899 }
getInvolvedFloatIds(int & float_count,lUInt32 * float_ids)1900 void RenderRectAccessor::getInvolvedFloatIds( int & float_count, lUInt32 * float_ids )
1901 {
1902 if ( _dirty ) {
1903 _dirty = false;
1904 _node->getRenderData(*this);
1905 #ifdef DEBUG_RENDER_RECT_ACCESS
1906 rr_lock( _node );
1907 #endif
1908 }
1909 float_count = _extra0;
1910 if (float_count > 0) float_ids[0] = _extra1;
1911 if (float_count > 1) float_ids[1] = _extra2;
1912 if (float_count > 2) float_ids[2] = _extra3;
1913 if (float_count > 3) float_ids[3] = _extra4;
1914 if (float_count > 4) float_ids[4] = _extra5;
1915 }
setInvolvedFloatIds(int float_count,lUInt32 * float_ids)1916 void RenderRectAccessor::setInvolvedFloatIds( int float_count, lUInt32 * float_ids )
1917 {
1918 if ( _dirty ) {
1919 _dirty = false;
1920 _node->getRenderData(*this);
1921 #ifdef DEBUG_RENDER_RECT_ACCESS
1922 rr_lock( _node );
1923 #endif
1924 }
1925 _extra0 = float_count;
1926 if (float_count > 0) _extra1 = float_ids[0];
1927 if (float_count > 1) _extra2 = float_ids[1];
1928 if (float_count > 2) _extra3 = float_ids[2];
1929 if (float_count > 3) _extra4 = float_ids[3];
1930 if (float_count > 4) _extra5 = float_ids[4];
1931 _modified = true;
1932 }
1933
1934 #endif
1935
1936
1937 class ldomPersistentText;
1938 class ldomPersistentElement;
1939
1940 /// common header for data storage items
1941 struct DataStorageItemHeader {
1942 /// item type: LXML_TEXT_NODE, LXML_ELEMENT_NODE, LXML_NO_DATA
1943 lUInt16 type;
1944 /// size of item / 16
1945 lUInt16 sizeDiv16;
1946 /// data index of this node in document
1947 lInt32 dataIndex;
1948 /// data index of parent node in document, 0 means no parent
1949 lInt32 parentIndex;
1950 };
1951
1952 /// text node storage implementation
1953 struct TextDataStorageItem : public DataStorageItemHeader {
1954 /// utf8 text length, characters
1955 lUInt16 length;
1956 /// utf8 text, w/o zero
1957 lChar8 text[2]; // utf8 text follows here, w/o zero byte at end
1958 /// return text
getTextTextDataStorageItem1959 inline lString32 getText() { return Utf8ToUnicode( text, length ); }
getText8TextDataStorageItem1960 inline lString8 getText8() { return lString8( text, length ); }
1961 };
1962
1963 /// element node data storage
1964 struct ElementDataStorageItem : public DataStorageItemHeader {
1965 lUInt16 id;
1966 lUInt16 nsid;
1967 lInt16 attrCount;
1968 lUInt8 rendMethod;
1969 lUInt8 reserved8;
1970 lInt32 childCount;
1971 lInt32 children[1];
attrsElementDataStorageItem1972 lUInt16 * attrs() { return (lUInt16 *)(children + childCount); }
attrElementDataStorageItem1973 lxmlAttribute * attr( int index ) { return (lxmlAttribute *)&(((lUInt16 *)(children + childCount))[index*4]); }
getAttrValueIdElementDataStorageItem1974 lUInt32 getAttrValueId( lUInt16 ns, lUInt16 id )
1975 {
1976 lUInt16 * a = attrs();
1977 for ( int i=0; i<attrCount; i++ ) {
1978 lxmlAttribute * attr = (lxmlAttribute *)(&a[i*4]);
1979 if ( !attr->compare( ns, id ) )
1980 continue;
1981 return attr->index;
1982 }
1983 return LXML_ATTR_VALUE_NONE;
1984 }
findAttrElementDataStorageItem1985 lxmlAttribute * findAttr( lUInt16 ns, lUInt16 id )
1986 {
1987 lUInt16 * a = attrs();
1988 for ( int i=0; i<attrCount; i++ ) {
1989 lxmlAttribute * attr = (lxmlAttribute *)(&a[i*4]);
1990 if ( attr->compare( ns, id ) )
1991 return attr;
1992 }
1993 return NULL;
1994 }
1995 // TODO: add items here
1996 //css_style_ref_t _style;
1997 //font_ref_t _font;
1998 };
1999
2000 #endif
2001
2002
2003 //=================================================================
2004 // tinyNodeCollection implementation
2005 //=================================================================
2006
tinyNodeCollection()2007 tinyNodeCollection::tinyNodeCollection()
2008 : _textCount(0)
2009 , _textNextFree(0)
2010 , _elemCount(0)
2011 , _elemNextFree(0)
2012 , _styles(STYLE_HASH_TABLE_SIZE)
2013 , _fonts(FONT_HASH_TABLE_SIZE)
2014 , _tinyElementCount(0)
2015 , _itemCount(0)
2016 #if BUILD_LITE!=1
2017 , _renderedBlockCache( 256 )
2018 , _cacheFile(NULL)
2019 , _cacheFileStale(true)
2020 , _cacheFileLeaveAsDirty(false)
2021 , _mapped(false)
2022 , _maperror(false)
2023 , _mapSavingStage(0)
2024 , _spaceWidthScalePercent(DEF_SPACE_WIDTH_SCALE_PERCENT)
2025 , _minSpaceCondensingPercent(DEF_MIN_SPACE_CONDENSING_PERCENT)
2026 , _unusedSpaceThresholdPercent(DEF_UNUSED_SPACE_THRESHOLD_PERCENT)
2027 , _maxAddedLetterSpacingPercent(DEF_MAX_ADDED_LETTER_SPACING_PERCENT)
2028 , _nodeStyleHash(0)
2029 , _nodeDisplayStyleHash(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2030 , _nodeDisplayStyleHashInitial(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2031 , _nodeStylesInvalidIfLoading(false)
2032 #endif
2033 , _textStorage(this, 't', (lUInt32)(TEXT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), TEXT_CACHE_CHUNK_SIZE ) // persistent text node data storage
2034 , _elemStorage(this, 'e', (lUInt32)(ELEM_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), ELEM_CACHE_CHUNK_SIZE ) // persistent element data storage
2035 , _rectStorage(this, 'r', (lUInt32)(RECT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), RECT_CACHE_CHUNK_SIZE ) // element render rect storage
2036 , _styleStorage(this, 's', (lUInt32)(STYLE_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), STYLE_CACHE_CHUNK_SIZE ) // element style info storage
2037 ,_docProps(LVCreatePropsContainer())
2038 ,_docFlags(DOC_FLAG_DEFAULTS)
2039 ,_fontMap(113)
2040 ,_hangingPunctuationEnabled(false)
2041 ,_renderBlockRenderingFlags(BLOCK_RENDERING_FLAGS_DEFAULT)
2042 ,_DOMVersionRequested(DOM_VERSION_CURRENT)
2043 ,_interlineScaleFactor(INTERLINE_SCALE_FACTOR_NO_SCALE)
2044 {
2045 memset( _textList, 0, sizeof(_textList) );
2046 memset( _elemList, 0, sizeof(_elemList) );
2047 // _docIndex assigned in ldomDocument constructor
2048 }
2049
tinyNodeCollection(tinyNodeCollection & v)2050 tinyNodeCollection::tinyNodeCollection( tinyNodeCollection & v )
2051 : _textCount(0)
2052 , _textNextFree(0)
2053 , _elemCount(0)
2054 , _elemNextFree(0)
2055 , _styles(STYLE_HASH_TABLE_SIZE)
2056 , _fonts(FONT_HASH_TABLE_SIZE)
2057 , _tinyElementCount(0)
2058 , _itemCount(0)
2059 #if BUILD_LITE!=1
2060 , _renderedBlockCache( 256 )
2061 , _cacheFile(NULL)
2062 , _cacheFileStale(true)
2063 , _cacheFileLeaveAsDirty(false)
2064 , _mapped(false)
2065 , _maperror(false)
2066 , _mapSavingStage(0)
2067 , _spaceWidthScalePercent(DEF_SPACE_WIDTH_SCALE_PERCENT)
2068 , _minSpaceCondensingPercent(DEF_MIN_SPACE_CONDENSING_PERCENT)
2069 , _unusedSpaceThresholdPercent(DEF_UNUSED_SPACE_THRESHOLD_PERCENT)
2070 , _maxAddedLetterSpacingPercent(DEF_MAX_ADDED_LETTER_SPACING_PERCENT)
2071 , _nodeStyleHash(0)
2072 , _nodeDisplayStyleHash(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2073 , _nodeDisplayStyleHashInitial(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
2074 , _nodeStylesInvalidIfLoading(false)
2075 #endif
2076 , _textStorage(this, 't', (lUInt32)(TEXT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), TEXT_CACHE_CHUNK_SIZE ) // persistent text node data storage
2077 , _elemStorage(this, 'e', (lUInt32)(ELEM_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), ELEM_CACHE_CHUNK_SIZE ) // persistent element data storage
2078 , _rectStorage(this, 'r', (lUInt32)(RECT_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), RECT_CACHE_CHUNK_SIZE ) // element render rect storage
2079 , _styleStorage(this, 's', (lUInt32)(STYLE_CACHE_UNPACKED_SPACE*_storageMaxUncompressedSizeFactor), STYLE_CACHE_CHUNK_SIZE ) // element style info storage
2080 ,_docProps(LVCreatePropsContainer())
2081 ,_docFlags(v._docFlags)
2082 ,_stylesheet(v._stylesheet)
2083 ,_fontMap(113)
2084 ,_hangingPunctuationEnabled(v._hangingPunctuationEnabled)
2085 ,_renderBlockRenderingFlags(v._renderBlockRenderingFlags)
2086 ,_DOMVersionRequested(v._DOMVersionRequested)
2087 ,_interlineScaleFactor(v._interlineScaleFactor)
2088 {
2089 memset( _textList, 0, sizeof(_textList) );
2090 memset( _elemList, 0, sizeof(_elemList) );
2091 // _docIndex assigned in ldomDocument constructor
2092 }
2093
setHangingPunctiationEnabled(bool value)2094 bool tinyNodeCollection::setHangingPunctiationEnabled(bool value) {
2095 if (_hangingPunctuationEnabled != value) {
2096 _hangingPunctuationEnabled = value;
2097 return true;
2098 }
2099 return false;
2100 }
2101
setRenderBlockRenderingFlags(lUInt32 flags)2102 bool tinyNodeCollection::setRenderBlockRenderingFlags(lUInt32 flags) {
2103 if (_renderBlockRenderingFlags != flags) {
2104 _renderBlockRenderingFlags = flags;
2105 // Check coherency and ensure dependencies of flags
2106 if (_renderBlockRenderingFlags & ~BLOCK_RENDERING_ENHANCED) // If any other flag is set,
2107 _renderBlockRenderingFlags |= BLOCK_RENDERING_ENHANCED; // set ENHANGED
2108 if (_renderBlockRenderingFlags & BLOCK_RENDERING_FLOAT_FLOATBOXES)
2109 _renderBlockRenderingFlags |= BLOCK_RENDERING_PREPARE_FLOATBOXES;
2110 if (_renderBlockRenderingFlags & BLOCK_RENDERING_PREPARE_FLOATBOXES)
2111 _renderBlockRenderingFlags |= BLOCK_RENDERING_WRAP_FLOATS;
2112 return true;
2113 }
2114 return false;
2115 }
2116
setDOMVersionRequested(lUInt32 version)2117 bool tinyNodeCollection::setDOMVersionRequested(lUInt32 version)
2118 {
2119 if (_DOMVersionRequested != version) {
2120 _DOMVersionRequested = version;
2121 return true;
2122 }
2123 return false;
2124 }
2125
setInterlineScaleFactor(int value)2126 bool tinyNodeCollection::setInterlineScaleFactor(int value) {
2127 if (_interlineScaleFactor != value) {
2128 _interlineScaleFactor = value;
2129 return true;
2130 }
2131 return false;
2132 }
2133
2134 #if BUILD_LITE!=1
openCacheFile()2135 bool tinyNodeCollection::openCacheFile()
2136 {
2137 if ( _cacheFile )
2138 return true;
2139 CacheFile * f = new CacheFile(_DOMVersionRequested);
2140 //lString32 cacheFileName("/tmp/cr3swap.tmp");
2141
2142 lString32 fname = getProps()->getStringDef( DOC_PROP_FILE_NAME, "noname" );
2143 //lUInt32 sz = (lUInt32)getProps()->getInt64Def(DOC_PROP_FILE_SIZE, 0);
2144 lUInt32 crc = (lUInt32)getProps()->getIntDef(DOC_PROP_FILE_CRC32, 0);
2145
2146 if ( !ldomDocCache::enabled() ) {
2147 CRLog::error("Cannot open cached document: cache dir is not initialized");
2148 delete f;
2149 return false;
2150 }
2151
2152 CRLog::info("ldomDocument::openCacheFile() - looking for cache file %s", UnicodeToUtf8(fname).c_str() );
2153
2154 lString32 cache_path;
2155 LVStreamRef map = ldomDocCache::openExisting( fname, crc, getPersistenceFlags(), cache_path );
2156 if ( map.isNull() ) {
2157 delete f;
2158 return false;
2159 }
2160 CRLog::info("ldomDocument::openCacheFile() - cache file found, trying to read index %s", UnicodeToUtf8(fname).c_str() );
2161
2162 if ( !f->open( map ) ) {
2163 delete f;
2164 return false;
2165 }
2166 CRLog::info("ldomDocument::openCacheFile() - index read successfully %s", UnicodeToUtf8(fname).c_str() );
2167 f->setCachePath(cache_path);
2168 _cacheFile = f;
2169 _textStorage.setCache( f );
2170 _elemStorage.setCache( f );
2171 _rectStorage.setCache( f );
2172 _styleStorage.setCache( f );
2173 _blobCache.setCacheFile( f );
2174 return true;
2175 }
2176
swapToCacheIfNecessary()2177 bool tinyNodeCollection::swapToCacheIfNecessary()
2178 {
2179 if ( !_cacheFile || _mapped || _maperror)
2180 return false;
2181 return createCacheFile();
2182 //return swapToCache();
2183 }
2184
createCacheFile()2185 bool tinyNodeCollection::createCacheFile()
2186 {
2187 if ( _cacheFile )
2188 return true;
2189 CacheFile * f = new CacheFile(_DOMVersionRequested);
2190 //lString32 cacheFileName("/tmp/cr3swap.tmp");
2191
2192 lString32 fname = getProps()->getStringDef( DOC_PROP_FILE_NAME, "noname" );
2193 lUInt32 sz = (lUInt32)getProps()->getInt64Def(DOC_PROP_FILE_SIZE, 0);
2194 lUInt32 crc = (lUInt32)getProps()->getIntDef(DOC_PROP_FILE_CRC32, 0);
2195
2196 if ( !ldomDocCache::enabled() ) {
2197 CRLog::error("Cannot swap: cache dir is not initialized");
2198 delete f;
2199 return false;
2200 }
2201
2202 CRLog::info("ldomDocument::createCacheFile() - initialized swapping of document %s to cache file", UnicodeToUtf8(fname).c_str() );
2203
2204 lString32 cache_path;
2205 LVStreamRef map = ldomDocCache::createNew( fname, crc, getPersistenceFlags(), sz, cache_path );
2206 if ( map.isNull() ) {
2207 delete f;
2208 return false;
2209 }
2210
2211 if ( !f->create( map ) ) {
2212 delete f;
2213 return false;
2214 }
2215 f->setCachePath(cache_path);
2216 _cacheFile = f;
2217 _mapped = true;
2218 _textStorage.setCache( f );
2219 _elemStorage.setCache( f );
2220 _rectStorage.setCache( f );
2221 _styleStorage.setCache( f );
2222 _blobCache.setCacheFile( f );
2223 setCacheFileStale(true);
2224 return true;
2225 }
2226
getCacheFilePath()2227 lString32 tinyNodeCollection::getCacheFilePath() {
2228 return _cacheFile != NULL ? _cacheFile->getCachePath() : lString32::empty_str;
2229 }
2230
clearNodeStyle(lUInt32 dataIndex)2231 void tinyNodeCollection::clearNodeStyle( lUInt32 dataIndex )
2232 {
2233 ldomNodeStyleInfo info;
2234 _styleStorage.getStyleData( dataIndex, &info );
2235 _styles.release( info._styleIndex );
2236 _fonts.release( info._fontIndex );
2237 info._fontIndex = info._styleIndex = 0;
2238 _styleStorage.setStyleData( dataIndex, &info );
2239 _nodeStyleHash = 0;
2240 }
2241
setNodeStyleIndex(lUInt32 dataIndex,lUInt16 index)2242 void tinyNodeCollection::setNodeStyleIndex( lUInt32 dataIndex, lUInt16 index )
2243 {
2244 ldomNodeStyleInfo info;
2245 _styleStorage.getStyleData( dataIndex, &info );
2246 if ( info._styleIndex!=index ) {
2247 info._styleIndex = index;
2248 _styleStorage.setStyleData( dataIndex, &info );
2249 _nodeStyleHash = 0;
2250 }
2251 }
2252
setNodeFontIndex(lUInt32 dataIndex,lUInt16 index)2253 void tinyNodeCollection::setNodeFontIndex( lUInt32 dataIndex, lUInt16 index )
2254 {
2255 ldomNodeStyleInfo info;
2256 _styleStorage.getStyleData( dataIndex, &info );
2257 if ( info._fontIndex!=index ) {
2258 info._fontIndex = index;
2259 _styleStorage.setStyleData( dataIndex, &info );
2260 _nodeStyleHash = 0;
2261 }
2262 }
2263
getNodeStyleIndex(lUInt32 dataIndex)2264 lUInt16 tinyNodeCollection::getNodeStyleIndex( lUInt32 dataIndex )
2265 {
2266 ldomNodeStyleInfo info;
2267 _styleStorage.getStyleData( dataIndex, &info );
2268 return info._styleIndex;
2269 }
2270
getNodeStyle(lUInt32 dataIndex)2271 css_style_ref_t tinyNodeCollection::getNodeStyle( lUInt32 dataIndex )
2272 {
2273 ldomNodeStyleInfo info;
2274 _styleStorage.getStyleData( dataIndex, &info );
2275 css_style_ref_t res = _styles.get( info._styleIndex );
2276 if (!res.isNull())
2277 _styles.addIndexRef(info._styleIndex);
2278 #if DEBUG_DOM_STORAGE==1
2279 if ( res.isNull() && info._styleIndex!=0 ) {
2280 CRLog::error("Null style returned for index %d", (int)info._styleIndex);
2281 }
2282 #endif
2283 return res;
2284 }
2285
getNodeFont(lUInt32 dataIndex)2286 font_ref_t tinyNodeCollection::getNodeFont( lUInt32 dataIndex )
2287 {
2288 ldomNodeStyleInfo info;
2289 _styleStorage.getStyleData( dataIndex, &info );
2290 return _fonts.get( info._fontIndex );
2291 }
2292
setNodeStyle(lUInt32 dataIndex,css_style_ref_t & v)2293 void tinyNodeCollection::setNodeStyle( lUInt32 dataIndex, css_style_ref_t & v )
2294 {
2295 ldomNodeStyleInfo info;
2296 _styleStorage.getStyleData( dataIndex, &info );
2297 _styles.cache( info._styleIndex, v );
2298 #if DEBUG_DOM_STORAGE==1
2299 if ( info._styleIndex==0 ) {
2300 CRLog::error("tinyNodeCollection::setNodeStyle() styleIndex is 0 after caching");
2301 }
2302 #endif
2303 _styleStorage.setStyleData( dataIndex, &info );
2304 _nodeStyleHash = 0;
2305 }
2306
setNodeFont(lUInt32 dataIndex,font_ref_t & v)2307 void tinyNodeCollection::setNodeFont( lUInt32 dataIndex, font_ref_t & v )
2308 {
2309 ldomNodeStyleInfo info;
2310 _styleStorage.getStyleData( dataIndex, &info );
2311 _fonts.cache( info._fontIndex, v );
2312 _styleStorage.setStyleData( dataIndex, &info );
2313 _nodeStyleHash = 0;
2314 }
2315
getNodeFontIndex(lUInt32 dataIndex)2316 lUInt16 tinyNodeCollection::getNodeFontIndex( lUInt32 dataIndex )
2317 {
2318 ldomNodeStyleInfo info;
2319 _styleStorage.getStyleData( dataIndex, &info );
2320 return info._fontIndex;
2321 }
2322
loadNodeData(lUInt16 type,ldomNode ** list,int nodecount)2323 bool tinyNodeCollection::loadNodeData(lUInt16 type, ldomNode ** list, int nodecount)
2324 {
2325 int count = ((nodecount + TNC_PART_LEN - 1) >> TNC_PART_SHIFT);
2326 for (lUInt16 i=0; i<count; i++) {
2327 int offs = i*TNC_PART_LEN;
2328 int sz = TNC_PART_LEN;
2329 if (offs + sz > nodecount) {
2330 sz = nodecount - offs;
2331 }
2332
2333 lUInt8 * p;
2334 int buflen;
2335 if (!_cacheFile->read( type, i, p, buflen ))
2336 return false;
2337 if (!p || (unsigned)buflen != sizeof(ldomNode) * sz)
2338 return false;
2339 ldomNode * buf = (ldomNode *)p;
2340 if (sz == TNC_PART_LEN)
2341 list[i] = buf;
2342 else {
2343 // buf contains `sz' ldomNode items
2344 // _elemList, _textList (as `list' argument) must always be TNC_PART_LEN size
2345 // add into `list' zero filled (TNC_PART_LEN - sz) items
2346 list[i] = (ldomNode *)realloc(buf, TNC_PART_LEN * sizeof(ldomNode));
2347 if (NULL == list[i]) {
2348 free(buf);
2349 CRLog::error("Not enough memory!");
2350 return false;
2351 }
2352 memset( list[i] + sz, 0, (TNC_PART_LEN - sz) * sizeof(ldomNode) );
2353 }
2354 for (int j=0; j<sz; j++) {
2355 list[i][j].setDocumentIndex( _docIndex );
2356 // validate loaded nodes: all non-null nodes should be marked as persistent, i.e. the actual node data: _data._pelem_addr, _data._ptext_addr,
2357 // NOT _data._elem_ptr, _data._text_ptr.
2358 // So we check this flag, but after setting document so that isNull() works correctly.
2359 // If the node is not persistent now, then _data._elem_ptr will be used, which then generate SEGFAULT.
2360 if (!list[i][j].isNull() && !list[i][j].isPersistent()) {
2361 CRLog::error("Invalid cached node, flag PERSISTENT are NOT set: segment=%d, index=%d", i, j);
2362 // list[i] will be freed in the caller method.
2363 return false;
2364 }
2365 if ( list[i][j].isElement() ) {
2366 // will be set by loadStyles/updateStyles
2367 //list[i][j]._data._pelem._styleIndex = 0;
2368 setNodeFontIndex( list[i][j]._handle._dataIndex, 0 );
2369 //list[i][j]._data._pelem._fontIndex = 0;
2370 }
2371 }
2372 }
2373 return true;
2374 }
2375
saveNodeData(lUInt16 type,ldomNode ** list,int nodecount)2376 bool tinyNodeCollection::saveNodeData( lUInt16 type, ldomNode ** list, int nodecount )
2377 {
2378 int count = ((nodecount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
2379 for (lUInt16 i=0; i<count; i++) {
2380 if (!list[i])
2381 continue;
2382 int offs = i*TNC_PART_LEN;
2383 int sz = TNC_PART_LEN;
2384 if (offs + sz > nodecount) {
2385 sz = nodecount - offs;
2386 }
2387 ldomNode buf[TNC_PART_LEN];
2388 memcpy(buf, list[i], sizeof(ldomNode) * sz);
2389 for (int j = 0; j < sz; j++) {
2390 buf[j].setDocumentIndex(_docIndex);
2391 // On 64bits builds, this serialized ldomNode may have some
2392 // random data at the end, for being:
2393 // union { [...] tinyElement * _elem_ptr; [...] lUInt32 _ptext_addr; [...] lUInt32 _nextFreeIndex }
2394 // To get "reproducible" cache files with a same file checksum, we'd
2395 // rather have the remains of the _elem_ptr sets to 0
2396 if (sizeof(int *) == 8) { // 64bits
2397 lUInt32 tmp = buf[j]._data._nextFreeIndex; // save 32bits part
2398 buf[j]._data._elem_ptr = 0; // clear 64bits area
2399 buf[j]._data._nextFreeIndex = tmp; // restore 32bits part
2400 }
2401 }
2402 if (!_cacheFile->write(type, i, (lUInt8*)buf, sizeof(ldomNode) * sz, COMPRESS_NODE_DATA))
2403 crFatalError(-1, "Cannot write node data");
2404 }
2405 return true;
2406 }
2407
2408 #define NODE_INDEX_MAGIC 0x19283746
saveNodeData()2409 bool tinyNodeCollection::saveNodeData()
2410 {
2411 SerialBuf buf(12, true);
2412 buf << (lUInt32)NODE_INDEX_MAGIC << (lUInt32)_elemCount << (lUInt32)_textCount;
2413 if ( !saveNodeData( CBT_ELEM_NODE, _elemList, _elemCount+1 ) )
2414 return false;
2415 if ( !saveNodeData( CBT_TEXT_NODE, _textList, _textCount+1 ) )
2416 return false;
2417 if ( !_cacheFile->write(CBT_NODE_INDEX, buf, COMPRESS_NODE_DATA) )
2418 return false;
2419 return true;
2420 }
2421
loadNodeData()2422 bool tinyNodeCollection::loadNodeData()
2423 {
2424 SerialBuf buf(0, true);
2425 if ( !_cacheFile->read((lUInt16)CBT_NODE_INDEX, buf) )
2426 return false;
2427 lUInt32 magic;
2428 lInt32 elemcount;
2429 lInt32 textcount;
2430 buf >> magic >> elemcount >> textcount;
2431 if ( magic != NODE_INDEX_MAGIC ) {
2432 return false;
2433 }
2434 if ( elemcount<=0 )
2435 return false;
2436 if ( textcount<=0 )
2437 return false;
2438 ldomNode * elemList[TNC_PART_COUNT] = { 0 };
2439 ldomNode * textList[TNC_PART_COUNT] = { 0 };
2440 if ( !loadNodeData( CBT_ELEM_NODE, elemList, elemcount+1 ) ) {
2441 for ( int i=0; i<TNC_PART_COUNT; i++ )
2442 if ( elemList[i] )
2443 free( elemList[i] );
2444 return false;
2445 }
2446 if ( !loadNodeData( CBT_TEXT_NODE, textList, textcount+1 ) ) {
2447 for ( int i=0; i<TNC_PART_COUNT; i++ )
2448 if ( textList[i] )
2449 free( textList[i] );
2450 // Also clean elemList previously successfully loaded, to avoid mem leak
2451 for ( int i=0; i<TNC_PART_COUNT; i++ )
2452 if ( elemList[i] )
2453 free( elemList[i] );
2454 return false;
2455 }
2456 for ( int i=0; i<TNC_PART_COUNT; i++ ) {
2457 if ( _elemList[i] )
2458 free( _elemList[i] );
2459 if ( _textList[i] )
2460 free( _textList[i] );
2461 }
2462 memcpy( _elemList, elemList, sizeof(elemList) );
2463 memcpy( _textList, textList, sizeof(textList) );
2464 _elemCount = elemcount;
2465 _textCount = textcount;
2466 return true;
2467 }
2468 #endif // BUILD_LITE!=1
2469
2470 /// get ldomNode instance pointer
getTinyNode(lUInt32 index)2471 ldomNode * tinyNodeCollection::getTinyNode( lUInt32 index )
2472 {
2473 if ( !index )
2474 return NULL;
2475 if ( index & 1 ) // element
2476 return &(_elemList[index>>TNC_PART_INDEX_SHIFT][(index>>4)&TNC_PART_MASK]);
2477 else // text
2478 return &(_textList[index>>TNC_PART_INDEX_SHIFT][(index>>4)&TNC_PART_MASK]);
2479 }
2480
2481 /// allocate new tiny node
allocTinyNode(int type)2482 ldomNode * tinyNodeCollection::allocTinyNode( int type )
2483 {
2484 ldomNode * res;
2485 if ( type & 1 ) {
2486 // allocate Element
2487 if ( _elemNextFree ) {
2488 // reuse existing free item
2489 int index = (_elemNextFree << 4) | type;
2490 res = getTinyNode(index);
2491 res->_handle._dataIndex = index;
2492 _elemNextFree = res->_data._nextFreeIndex;
2493 } else {
2494 // create new item
2495 _elemCount++;
2496 int idx = _elemCount >> TNC_PART_SHIFT;
2497 if (idx >= TNC_PART_COUNT)
2498 crFatalError(1003, "allocTinyNode: can't create any more element nodes (hard limit)");
2499 ldomNode * part = _elemList[idx];
2500 if ( !part ) {
2501 part = (ldomNode*)calloc(TNC_PART_LEN, sizeof(*part));
2502 _elemList[idx] = part;
2503 }
2504 res = &part[_elemCount & TNC_PART_MASK];
2505 res->setDocumentIndex( _docIndex );
2506 res->_handle._dataIndex = (_elemCount << 4) | type;
2507 }
2508 _itemCount++;
2509 } else {
2510 // allocate Text
2511 if ( _textNextFree ) {
2512 // reuse existing free item
2513 int index = (_textNextFree << 4) | type;
2514 res = getTinyNode(index);
2515 res->_handle._dataIndex = index;
2516 _textNextFree = res->_data._nextFreeIndex;
2517 } else {
2518 // create new item
2519 _textCount++;
2520 if (_textCount >= (TNC_PART_COUNT << TNC_PART_SHIFT))
2521 crFatalError(1003, "allocTinyNode: can't create any more text nodes (hard limit)");
2522 ldomNode * part = _textList[_textCount >> TNC_PART_SHIFT];
2523 if ( !part ) {
2524 part = (ldomNode*)calloc(TNC_PART_LEN, sizeof(*part));
2525 _textList[ _textCount >> TNC_PART_SHIFT ] = part;
2526 }
2527 res = &part[_textCount & TNC_PART_MASK];
2528 res->setDocumentIndex( _docIndex );
2529 res->_handle._dataIndex = (_textCount << 4) | type;
2530 }
2531 _itemCount++;
2532 }
2533 _nodeStyleHash = 0;
2534 return res;
2535 }
2536
recycleTinyNode(lUInt32 index)2537 void tinyNodeCollection::recycleTinyNode( lUInt32 index )
2538 {
2539 if ( index & 1 ) {
2540 // element
2541 index >>= 4;
2542 ldomNode * part = _elemList[index >> TNC_PART_SHIFT];
2543 ldomNode * p = &part[index & TNC_PART_MASK];
2544 p->_handle._dataIndex = 0; // indicates NULL node
2545 p->_data._nextFreeIndex = _elemNextFree;
2546 _elemNextFree = index;
2547 _itemCount--;
2548 } else {
2549 // text
2550 index >>= 4;
2551 ldomNode * part = _textList[index >> TNC_PART_SHIFT];
2552 ldomNode * p = &part[index & TNC_PART_MASK];
2553 p->_handle._dataIndex = 0; // indicates NULL node
2554 p->_data._nextFreeIndex = _textNextFree;
2555 _textNextFree = index;
2556 _itemCount--;
2557 }
2558 _nodeStyleHash = 0;
2559 }
2560
~tinyNodeCollection()2561 tinyNodeCollection::~tinyNodeCollection()
2562 {
2563 #if BUILD_LITE!=1
2564 if ( _cacheFile )
2565 delete _cacheFile;
2566 #endif
2567 // clear all elem parts
2568 for ( int partindex = 0; partindex<=(_elemCount>>TNC_PART_SHIFT); partindex++ ) {
2569 ldomNode * part = _elemList[partindex];
2570 if ( part ) {
2571 int n0 = TNC_PART_LEN * partindex;
2572 for ( int i=0; i<TNC_PART_LEN && n0+i<=_elemCount; i++ )
2573 part[i].onCollectionDestroy();
2574 free(part);
2575 _elemList[partindex] = NULL;
2576 }
2577 }
2578 // clear all text parts
2579 for ( int partindex = 0; partindex<=(_textCount>>TNC_PART_SHIFT); partindex++ ) {
2580 ldomNode * part = _textList[partindex];
2581 if ( part ) {
2582 int n0 = TNC_PART_LEN * partindex;
2583 for ( int i=0; i<TNC_PART_LEN && n0+i<=_textCount; i++ )
2584 part[i].onCollectionDestroy();
2585 free(part);
2586 _textList[partindex] = NULL;
2587 }
2588 }
2589 // document unregistered in ldomDocument destructor
2590 }
2591
2592 #if BUILD_LITE!=1
2593 /// put all objects into persistent storage
persist(CRTimerUtil & maxTime)2594 void tinyNodeCollection::persist( CRTimerUtil & maxTime )
2595 {
2596 CRLog::info("lxmlDocBase::persist() invoked - converting all nodes to persistent objects");
2597 // elements
2598 for ( int partindex = 0; partindex<=(_elemCount>>TNC_PART_SHIFT); partindex++ ) {
2599 ldomNode * part = _elemList[partindex];
2600 if ( part ) {
2601 int n0 = TNC_PART_LEN * partindex;
2602 for ( int i=0; i<TNC_PART_LEN && n0+i<=_elemCount; i++ )
2603 if ( !part[i].isNull() && !part[i].isPersistent() ) {
2604 part[i].persist();
2605 if (maxTime.expired())
2606 return;
2607 }
2608 }
2609 }
2610 //_cacheFile->flush(false); // intermediate flush
2611 if ( maxTime.expired() )
2612 return;
2613 // texts
2614 for ( int partindex = 0; partindex<=(_textCount>>TNC_PART_SHIFT); partindex++ ) {
2615 ldomNode * part = _textList[partindex];
2616 if ( part ) {
2617 int n0 = TNC_PART_LEN * partindex;
2618 for ( int i=0; i<TNC_PART_LEN && n0+i<=_textCount; i++ )
2619 if ( !part[i].isNull() && !part[i].isPersistent() ) {
2620 //CRLog::trace("before persist");
2621 part[i].persist();
2622 //CRLog::trace("after persist");
2623 if (maxTime.expired())
2624 return;
2625 }
2626 }
2627 }
2628 //_cacheFile->flush(false); // intermediate flush
2629 }
2630 #endif
2631
2632
2633 /*
2634
2635 Struct Node
2636 { document, nodeid&type, address }
2637
2638 Data Offset format
2639
2640 Chunk index, offset, type.
2641
2642 getDataPtr( lUInt32 address )
2643 {
2644 return (address & TYPE_MASK) ? textStorage.get( address & ~TYPE_MASK ) : elementStorage.get( address & ~TYPE_MASK );
2645 }
2646
2647 index->instance, data
2648 >
2649 [index] { vtable, doc, id, dataptr } // 16 bytes per node
2650
2651
2652 */
2653
2654
2655 /// saves all unsaved chunks to cache file
save(CRTimerUtil & maxTime)2656 bool ldomDataStorageManager::save( CRTimerUtil & maxTime )
2657 {
2658 bool res = true;
2659 #if BUILD_LITE!=1
2660 if ( !_cache )
2661 return true;
2662 for ( int i=0; i<_chunks.length(); i++ ) {
2663 if ( !_chunks[i]->save() ) {
2664 res = false;
2665 break;
2666 }
2667 //CRLog::trace("time elapsed: %d", (int)maxTime.elapsed());
2668 if (maxTime.expired())
2669 return res;
2670 // if ( (i&3)==3 && maxTime.expired() )
2671 // return res;
2672 }
2673 if (!maxTime.infinite())
2674 _cache->flush(false, maxTime); // intermediate flush
2675 if ( maxTime.expired() )
2676 return res;
2677 if ( !res )
2678 return false;
2679 // save chunk index
2680 int n = _chunks.length();
2681 SerialBuf buf(n*4+4, true);
2682 buf << (lUInt32)n;
2683 for ( int i=0; i<n; i++ ) {
2684 buf << (lUInt32)_chunks[i]->_bufpos;
2685 }
2686 res = _cache->write( cacheType(), 0xFFFF, buf, COMPRESS_NODE_STORAGE_DATA );
2687 if ( !res ) {
2688 CRLog::error("ldomDataStorageManager::save() - Cannot write chunk index");
2689 }
2690 #endif
2691 return res;
2692 }
2693
2694 /// load chunk index from cache file
load()2695 bool ldomDataStorageManager::load()
2696 {
2697 #if BUILD_LITE!=1
2698 if ( !_cache )
2699 return false;
2700 //load chunk index
2701 SerialBuf buf(0, true);
2702 if ( !_cache->read( cacheType(), 0xFFFF, buf ) ) {
2703 CRLog::error("ldomDataStorageManager::load() - Cannot read chunk index");
2704 return false;
2705 }
2706 lUInt32 n;
2707 buf >> n;
2708 if (n > 10000)
2709 return false; // invalid
2710 _recentChunk = NULL;
2711 _chunks.clear();
2712 lUInt32 compsize = 0;
2713 lUInt32 uncompsize = 0;
2714 for (lUInt32 i=0; i<n; i++ ) {
2715 buf >> uncompsize;
2716 if ( buf.error() ) {
2717 _chunks.clear();
2718 return false;
2719 }
2720 _chunks.add( new ldomTextStorageChunk( this, (lUInt16)i,compsize, uncompsize ) );
2721 }
2722 return true;
2723 #else
2724 return false;
2725 #endif
2726 }
2727
2728 /// get chunk pointer and update usage data
getChunk(lUInt32 address)2729 ldomTextStorageChunk * ldomDataStorageManager::getChunk( lUInt32 address )
2730 {
2731 ldomTextStorageChunk * chunk = _chunks[address>>16];
2732 if ( chunk!=_recentChunk ) {
2733 if ( chunk->_prevRecent )
2734 chunk->_prevRecent->_nextRecent = chunk->_nextRecent;
2735 if ( chunk->_nextRecent )
2736 chunk->_nextRecent->_prevRecent = chunk->_prevRecent;
2737 chunk->_prevRecent = NULL;
2738 if (((chunk->_nextRecent = _recentChunk)))
2739 _recentChunk->_prevRecent = chunk;
2740 _recentChunk = chunk;
2741 }
2742 chunk->ensureUnpacked();
2743 return chunk;
2744 }
2745
setCache(CacheFile * cache)2746 void ldomDataStorageManager::setCache( CacheFile * cache )
2747 {
2748 _cache = cache;
2749 }
2750
2751 /// type
cacheType()2752 lUInt16 ldomDataStorageManager::cacheType()
2753 {
2754 switch ( _type ) {
2755 case 't':
2756 return CBT_TEXT_DATA;
2757 case 'e':
2758 return CBT_ELEM_DATA;
2759 case 'r':
2760 return CBT_RECT_DATA;
2761 case 's':
2762 return CBT_ELEM_STYLE_DATA;
2763 }
2764 return 0;
2765 }
2766
2767 /// get or allocate space for element style data item
getStyleData(lUInt32 elemDataIndex,ldomNodeStyleInfo * dst)2768 void ldomDataStorageManager::getStyleData( lUInt32 elemDataIndex, ldomNodeStyleInfo * dst )
2769 {
2770 // assume storage has raw data chunks
2771 int index = elemDataIndex>>4; // element sequential index
2772 int chunkIndex = index >> STYLE_DATA_CHUNK_ITEMS_SHIFT;
2773 while ( _chunks.length() <= chunkIndex ) {
2774 //if ( _chunks.length()>0 )
2775 // _chunks[_chunks.length()-1]->compact();
2776 _chunks.add( new ldomTextStorageChunk(STYLE_DATA_CHUNK_SIZE, this, _chunks.length()) );
2777 getChunk( (_chunks.length()-1)<<16 );
2778 compact( 0 );
2779 }
2780 ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2781 int offsetIndex = index & STYLE_DATA_CHUNK_MASK;
2782 chunk->getRaw( offsetIndex * sizeof(ldomNodeStyleInfo), sizeof(ldomNodeStyleInfo), (lUInt8 *)dst );
2783 }
2784
2785 /// set element style data item
setStyleData(lUInt32 elemDataIndex,const ldomNodeStyleInfo * src)2786 void ldomDataStorageManager::setStyleData( lUInt32 elemDataIndex, const ldomNodeStyleInfo * src )
2787 {
2788 // assume storage has raw data chunks
2789 int index = elemDataIndex>>4; // element sequential index
2790 int chunkIndex = index >> STYLE_DATA_CHUNK_ITEMS_SHIFT;
2791 while ( _chunks.length() <= chunkIndex ) {
2792 //if ( _chunks.length()>0 )
2793 // _chunks[_chunks.length()-1]->compact();
2794 _chunks.add( new ldomTextStorageChunk(STYLE_DATA_CHUNK_SIZE, this, _chunks.length()) );
2795 getChunk( (_chunks.length()-1)<<16 );
2796 compact( 0 );
2797 }
2798 ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2799 int offsetIndex = index & STYLE_DATA_CHUNK_MASK;
2800 chunk->setRaw( offsetIndex * sizeof(ldomNodeStyleInfo), sizeof(ldomNodeStyleInfo), (const lUInt8 *)src );
2801 }
2802
2803
2804 /// get or allocate space for rect data item
getRendRectData(lUInt32 elemDataIndex,lvdomElementFormatRec * dst)2805 void ldomDataStorageManager::getRendRectData( lUInt32 elemDataIndex, lvdomElementFormatRec * dst )
2806 {
2807 // assume storage has raw data chunks
2808 int index = elemDataIndex>>4; // element sequential index
2809 int chunkIndex = index >> RECT_DATA_CHUNK_ITEMS_SHIFT;
2810 while ( _chunks.length() <= chunkIndex ) {
2811 //if ( _chunks.length()>0 )
2812 // _chunks[_chunks.length()-1]->compact();
2813 _chunks.add( new ldomTextStorageChunk(RECT_DATA_CHUNK_SIZE, this, _chunks.length()) );
2814 getChunk( (_chunks.length()-1)<<16 );
2815 compact( 0 );
2816 }
2817 ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2818 int offsetIndex = index & RECT_DATA_CHUNK_MASK;
2819 chunk->getRaw( offsetIndex * sizeof(lvdomElementFormatRec), sizeof(lvdomElementFormatRec), (lUInt8 *)dst );
2820 }
2821
2822 /// set rect data item
setRendRectData(lUInt32 elemDataIndex,const lvdomElementFormatRec * src)2823 void ldomDataStorageManager::setRendRectData( lUInt32 elemDataIndex, const lvdomElementFormatRec * src )
2824 {
2825 // assume storage has raw data chunks
2826 int index = elemDataIndex>>4; // element sequential index
2827 int chunkIndex = index >> RECT_DATA_CHUNK_ITEMS_SHIFT;
2828 while ( _chunks.length() <= chunkIndex ) {
2829 //if ( _chunks.length()>0 )
2830 // _chunks[_chunks.length()-1]->compact();
2831 _chunks.add( new ldomTextStorageChunk(RECT_DATA_CHUNK_SIZE, this, _chunks.length()) );
2832 getChunk( (_chunks.length()-1)<<16 );
2833 compact( 0 );
2834 }
2835 ldomTextStorageChunk * chunk = getChunk( chunkIndex<<16 );
2836 int offsetIndex = index & RECT_DATA_CHUNK_MASK;
2837 chunk->setRaw( offsetIndex * sizeof(lvdomElementFormatRec), sizeof(lvdomElementFormatRec), (const lUInt8 *)src );
2838 }
2839
2840 #if BUILD_LITE!=1
allocText(lUInt32 dataIndex,lUInt32 parentIndex,const lString8 & text)2841 lUInt32 ldomDataStorageManager::allocText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text )
2842 {
2843 if ( !_activeChunk ) {
2844 _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2845 _chunks.add( _activeChunk );
2846 getChunk( (_chunks.length()-1)<<16 );
2847 compact( 0 );
2848 }
2849 int offset = _activeChunk->addText( dataIndex, parentIndex, text );
2850 if ( offset<0 ) {
2851 // no space in current chunk, add one more chunk
2852 //_activeChunk->compact();
2853 _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2854 _chunks.add( _activeChunk );
2855 getChunk( (_chunks.length()-1)<<16 );
2856 compact( 0 );
2857 offset = _activeChunk->addText( dataIndex, parentIndex, text );
2858 if ( offset<0 )
2859 crFatalError(1001, "Unexpected error while allocation of text");
2860 }
2861 return offset | (_activeChunk->getIndex()<<16);
2862 }
2863
allocElem(lUInt32 dataIndex,lUInt32 parentIndex,int childCount,int attrCount)2864 lUInt32 ldomDataStorageManager::allocElem( lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount )
2865 {
2866 if ( !_activeChunk ) {
2867 _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2868 _chunks.add( _activeChunk );
2869 getChunk( (_chunks.length()-1)<<16 );
2870 compact( 0 );
2871 }
2872 int offset = _activeChunk->addElem( dataIndex, parentIndex, childCount, attrCount );
2873 if ( offset<0 ) {
2874 // no space in current chunk, add one more chunk
2875 //_activeChunk->compact();
2876 _activeChunk = new ldomTextStorageChunk(this, _chunks.length());
2877 _chunks.add( _activeChunk );
2878 getChunk( (_chunks.length()-1)<<16 );
2879 compact( 0 );
2880 offset = _activeChunk->addElem( dataIndex, parentIndex, childCount, attrCount );
2881 if ( offset<0 )
2882 crFatalError(1002, "Unexpected error while allocation of element");
2883 }
2884 return offset | (_activeChunk->getIndex()<<16);
2885 }
2886
2887 /// call to invalidate chunk if content is modified
modified(lUInt32 addr)2888 void ldomDataStorageManager::modified( lUInt32 addr )
2889 {
2890 ldomTextStorageChunk * chunk = getChunk(addr);
2891 chunk->modified();
2892 }
2893
2894 /// change node's parent
setParent(lUInt32 address,lUInt32 parent)2895 bool ldomDataStorageManager::setParent( lUInt32 address, lUInt32 parent )
2896 {
2897 ldomTextStorageChunk * chunk = getChunk(address);
2898 return chunk->setParent(address&0xFFFF, parent);
2899 }
2900
2901 /// free data item
freeNode(lUInt32 addr)2902 void ldomDataStorageManager::freeNode( lUInt32 addr )
2903 {
2904 ldomTextStorageChunk * chunk = getChunk(addr);
2905 chunk->freeNode(addr&0xFFFF);
2906 }
2907
2908
getText(lUInt32 address)2909 lString8 ldomDataStorageManager::getText( lUInt32 address )
2910 {
2911 ldomTextStorageChunk * chunk = getChunk(address);
2912 return chunk->getText(address&0xFFFF);
2913 }
2914
2915 /// get pointer to element data
getElem(lUInt32 addr)2916 ElementDataStorageItem * ldomDataStorageManager::getElem( lUInt32 addr )
2917 {
2918 ldomTextStorageChunk * chunk = getChunk(addr);
2919 return chunk->getElem(addr&0xFFFF);
2920 }
2921
2922 /// returns node's parent by address
getParent(lUInt32 addr)2923 lUInt32 ldomDataStorageManager::getParent( lUInt32 addr )
2924 {
2925 ldomTextStorageChunk * chunk = getChunk(addr);
2926 return chunk->getElem(addr&0xFFFF)->parentIndex;
2927 }
2928 #endif
2929
compact(int reservedSpace,const ldomTextStorageChunk * excludedChunk)2930 void ldomDataStorageManager::compact( int reservedSpace, const ldomTextStorageChunk* excludedChunk )
2931 {
2932 #if BUILD_LITE!=1
2933 if ( _uncompressedSize + reservedSpace > _maxUncompressedSize + _maxUncompressedSize/10 ) { // allow +10% overflow
2934 if (!_maxSizeReachedWarned) {
2935 // Log once to stdout that we reached maxUncompressedSize, so we can know
2936 // of this fact and consider it as a possible cause for crengine bugs
2937 CRLog::warn("Storage for %s reached max allowed uncompressed size (%u > %u)",
2938 (_type == 't' ? "TEXT NODES" : (_type == 'e' ? "ELEMENTS" : (_type == 'r' ? "RENDERED RECTS" : (_type == 's' ? "ELEMENTS' STYLE DATA" : "OTHER")))),
2939 _uncompressedSize, _maxUncompressedSize);
2940 CRLog::warn(" -> check settings.");
2941 _maxSizeReachedWarned = true; // warn only once
2942 }
2943 _owner->setCacheFileStale(true); // we may write: consider cache file stale
2944 // do compacting
2945 int sumsize = reservedSpace;
2946 for ( ldomTextStorageChunk * p = _recentChunk; p; p = p->_nextRecent ) {
2947 if ( (int)p->_bufsize + sumsize < _maxUncompressedSize ||
2948 (p==_activeChunk && reservedSpace<0xFFFFFFF) ||
2949 p == excludedChunk) {
2950 // fits
2951 sumsize += p->_bufsize;
2952 } else {
2953 if ( !_cache )
2954 _owner->createCacheFile();
2955 if ( _cache ) {
2956 if ( !p->swapToCache(true) ) {
2957 crFatalError(111, "Swap file writing error!");
2958 }
2959 }
2960 }
2961 }
2962
2963 }
2964 #endif
2965 }
2966
2967 // max 512K of uncompressed data (~8 chunks)
2968 #define DEF_MAX_UNCOMPRESSED_SIZE 0x80000
ldomDataStorageManager(tinyNodeCollection * owner,char type,lUInt32 maxUnpackedSize,lUInt32 chunkSize)2969 ldomDataStorageManager::ldomDataStorageManager( tinyNodeCollection * owner, char type, lUInt32 maxUnpackedSize, lUInt32 chunkSize )
2970 : _owner( owner )
2971 , _activeChunk(NULL)
2972 , _recentChunk(NULL)
2973 , _cache(NULL)
2974 , _uncompressedSize(0)
2975 , _maxUncompressedSize(maxUnpackedSize)
2976 , _chunkSize(chunkSize)
2977 , _type(type)
2978 , _maxSizeReachedWarned(false)
2979 {
2980 }
2981
~ldomDataStorageManager()2982 ldomDataStorageManager::~ldomDataStorageManager()
2983 {
2984 }
2985
2986 /// create chunk to be read from cache file
ldomTextStorageChunk(ldomDataStorageManager * manager,lUInt16 index,lUInt32 compsize,lUInt32 uncompsize)2987 ldomTextStorageChunk::ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index, lUInt32 compsize, lUInt32 uncompsize)
2988 : _manager(manager)
2989 , _nextRecent(NULL)
2990 , _prevRecent(NULL)
2991 , _buf(NULL) /// buffer for uncompressed data
2992 , _bufsize(0) /// _buf (uncompressed) area size, bytes
2993 , _bufpos(uncompsize) /// _buf (uncompressed) data write position (for appending of new data)
2994 , _index(index) /// ? index of chunk in storage
2995 , _type( manager->_type )
2996 , _saved(true)
2997 {
2998 CR_UNUSED(compsize);
2999 }
3000
ldomTextStorageChunk(lUInt32 preAllocSize,ldomDataStorageManager * manager,lUInt16 index)3001 ldomTextStorageChunk::ldomTextStorageChunk(lUInt32 preAllocSize, ldomDataStorageManager * manager, lUInt16 index)
3002 : _manager(manager)
3003 , _nextRecent(NULL)
3004 , _prevRecent(NULL)
3005 , _buf(NULL) /// buffer for uncompressed data
3006 , _bufsize(preAllocSize) /// _buf (uncompressed) area size, bytes
3007 , _bufpos(preAllocSize) /// _buf (uncompressed) data write position (for appending of new data)
3008 , _index(index) /// ? index of chunk in storage
3009 , _type( manager->_type )
3010 , _saved(false)
3011 {
3012 _buf = (lUInt8*)calloc(preAllocSize, sizeof(*_buf));
3013 _manager->_uncompressedSize += _bufsize;
3014 }
3015
ldomTextStorageChunk(ldomDataStorageManager * manager,lUInt16 index)3016 ldomTextStorageChunk::ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index)
3017 : _manager(manager)
3018 , _nextRecent(NULL)
3019 , _prevRecent(NULL)
3020 , _buf(NULL) /// buffer for uncompressed data
3021 , _bufsize(0) /// _buf (uncompressed) area size, bytes
3022 , _bufpos(0) /// _buf (uncompressed) data write position (for appending of new data)
3023 , _index(index) /// ? index of chunk in storage
3024 , _type( manager->_type )
3025 , _saved(false)
3026 {
3027 }
3028
3029 #if BUILD_LITE!=1
3030 /// saves data to cache file, if unsaved
save()3031 bool ldomTextStorageChunk::save()
3032 {
3033 if ( !_saved )
3034 return swapToCache(false);
3035 return true;
3036 }
3037 #endif
3038
~ldomTextStorageChunk()3039 ldomTextStorageChunk::~ldomTextStorageChunk()
3040 {
3041 setunpacked(NULL, 0);
3042 }
3043
3044
3045 #if BUILD_LITE!=1
3046 /// pack data, and remove unpacked, put packed data to cache file
swapToCache(bool removeFromMemory)3047 bool ldomTextStorageChunk::swapToCache( bool removeFromMemory )
3048 {
3049 if ( !_manager->_cache )
3050 return true;
3051 if ( _buf ) {
3052 if ( !_saved && _manager->_cache) {
3053 #if DEBUG_DOM_STORAGE==1
3054 CRLog::debug("Writing %d bytes of chunk %c%d to cache", _bufpos, _type, _index);
3055 #endif
3056 if ( !_manager->_cache->write( _manager->cacheType(), _index, _buf, _bufpos, COMPRESS_NODE_STORAGE_DATA) ) {
3057 CRLog::error("Error while swapping of chunk %c%d to cache file", _type, _index);
3058 crFatalError(-1, "Error while swapping of chunk to cache file");
3059 return false;
3060 }
3061 _saved = true;
3062 }
3063 }
3064 if ( removeFromMemory ) {
3065 setunpacked(NULL, 0);
3066 }
3067 return true;
3068 }
3069
3070 /// read packed data from cache
restoreFromCache()3071 bool ldomTextStorageChunk::restoreFromCache()
3072 {
3073 if ( _buf )
3074 return true;
3075 if ( !_saved )
3076 return false;
3077 int size;
3078 if ( !_manager->_cache->read( _manager->cacheType(), _index, _buf, size ) )
3079 return false;
3080 _bufsize = size;
3081 _manager->_uncompressedSize += _bufsize;
3082 #if DEBUG_DOM_STORAGE==1
3083 CRLog::debug("Read %d bytes of chunk %c%d from cache", _bufsize, _type, _index);
3084 #endif
3085 return true;
3086 }
3087 #endif
3088
3089 /// get raw data bytes
getRaw(int offset,int size,lUInt8 * buf)3090 void ldomTextStorageChunk::getRaw( int offset, int size, lUInt8 * buf )
3091 {
3092 #ifdef _DEBUG
3093 if ( !_buf || offset+size>(int)_bufpos || offset+size>(int)_bufsize )
3094 crFatalError(123, "ldomTextStorageChunk: Invalid raw data buffer position");
3095 #endif
3096 memcpy( buf, _buf+offset, size );
3097 }
3098
3099 /// set raw data bytes
setRaw(int offset,int size,const lUInt8 * buf)3100 void ldomTextStorageChunk::setRaw( int offset, int size, const lUInt8 * buf )
3101 {
3102 #ifdef _DEBUG
3103 if ( !_buf || offset+size>(int)_bufpos || offset+size>(int)_bufsize )
3104 crFatalError(123, "ldomTextStorageChunk: Invalid raw data buffer position");
3105 #endif
3106 if (memcmp(_buf+offset, buf, size) != 0) {
3107 memcpy(_buf+offset, buf, size);
3108 modified();
3109 }
3110 }
3111
3112
3113 /// returns free space in buffer
space()3114 int ldomTextStorageChunk::space()
3115 {
3116 return _bufsize - _bufpos;
3117 }
3118
3119 #if BUILD_LITE!=1
3120 /// returns free space in buffer
addText(lUInt32 dataIndex,lUInt32 parentIndex,const lString8 & text)3121 int ldomTextStorageChunk::addText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text )
3122 {
3123 int itemsize = (sizeof(TextDataStorageItem)+text.length()-2 + 15) & 0xFFFFFFF0;
3124 if ( !_buf ) {
3125 // create new buffer, if necessary
3126 _bufsize = _manager->_chunkSize > itemsize ? _manager->_chunkSize : itemsize;
3127 _buf = (lUInt8*)calloc(_bufsize, sizeof(*_buf));
3128 _bufpos = 0;
3129 _manager->_uncompressedSize += _bufsize;
3130 }
3131 if ( (int)_bufsize - (int)_bufpos < itemsize )
3132 return -1;
3133 TextDataStorageItem * p = (TextDataStorageItem*)(_buf + _bufpos);
3134 p->sizeDiv16 = (lUInt16)(itemsize >> 4);
3135 p->dataIndex = dataIndex;
3136 p->parentIndex = parentIndex;
3137 p->type = LXML_TEXT_NODE;
3138 p->length = (lUInt16)text.length();
3139 memcpy(p->text, text.c_str(), p->length);
3140 int res = _bufpos >> 4;
3141 _bufpos += itemsize;
3142 return res;
3143 }
3144
3145 /// adds new element item to buffer, returns offset inside chunk of stored data
addElem(lUInt32 dataIndex,lUInt32 parentIndex,int childCount,int attrCount)3146 int ldomTextStorageChunk::addElem(lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount)
3147 {
3148 int itemsize = (sizeof(ElementDataStorageItem) + attrCount*(sizeof(lUInt16)*2 + sizeof(lUInt32)) + childCount*sizeof(lUInt32) - sizeof(lUInt32) + 15) & 0xFFFFFFF0;
3149 if ( !_buf ) {
3150 // create new buffer, if necessary
3151 _bufsize = _manager->_chunkSize > itemsize ? _manager->_chunkSize : itemsize;
3152 _buf = (lUInt8*)calloc(_bufsize, sizeof(*_buf));
3153 _bufpos = 0;
3154 _manager->_uncompressedSize += _bufsize;
3155 }
3156 if ( _bufsize - _bufpos < (unsigned)itemsize )
3157 return -1;
3158 ElementDataStorageItem *item = (ElementDataStorageItem *)(_buf + _bufpos);
3159 if ( item ) {
3160 item->sizeDiv16 = (lUInt16)(itemsize >> 4);
3161 item->dataIndex = dataIndex;
3162 item->parentIndex = parentIndex;
3163 item->type = LXML_ELEMENT_NODE;
3164 item->parentIndex = parentIndex;
3165 item->attrCount = (lUInt16)attrCount;
3166 item->childCount = childCount;
3167 }
3168 int res = _bufpos >> 4;
3169 _bufpos += itemsize;
3170 return res;
3171 }
3172
3173 /// set node parent by offset
setParent(int offset,lUInt32 parentIndex)3174 bool ldomTextStorageChunk::setParent( int offset, lUInt32 parentIndex )
3175 {
3176 offset <<= 4;
3177 if ( offset>=0 && offset<(int)_bufpos ) {
3178 TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3179 if ( (int)parentIndex!=item->parentIndex ) {
3180 item->parentIndex = parentIndex;
3181 modified();
3182 return true;
3183 } else
3184 return false;
3185 }
3186 CRLog::error("Offset %d is out of bounds (%d) for storage chunk %c%d, chunkCount=%d", offset, this->_bufpos, this->_type, this->_index, _manager->_chunks.length() );
3187 return false;
3188 }
3189
3190
3191 /// get text node parent by offset
getParent(int offset)3192 lUInt32 ldomTextStorageChunk::getParent( int offset )
3193 {
3194 offset <<= 4;
3195 if ( offset>=0 && offset<(int)_bufpos ) {
3196 TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3197 return item->parentIndex;
3198 }
3199 CRLog::error("Offset %d is out of bounds (%d) for storage chunk %c%d, chunkCount=%d", offset, this->_bufpos, this->_type, this->_index, _manager->_chunks.length() );
3200 return 0;
3201 }
3202
3203 /// get pointer to element data
getElem(int offset)3204 ElementDataStorageItem * ldomTextStorageChunk::getElem( int offset )
3205 {
3206 offset <<= 4;
3207 if ( offset>=0 && offset<(int)_bufpos ) {
3208 ElementDataStorageItem * item = (ElementDataStorageItem *)(_buf+offset);
3209 return item;
3210 }
3211 CRLog::error("Offset %d is out of bounds (%d) for storage chunk %c%d, chunkCount=%d", offset, this->_bufpos, this->_type, this->_index, _manager->_chunks.length() );
3212 return NULL;
3213 }
3214 #endif
3215
3216
3217 /// call to invalidate chunk if content is modified
modified()3218 void ldomTextStorageChunk::modified()
3219 {
3220 if ( !_buf ) {
3221 CRLog::error("Modified is called for node which is not in memory");
3222 }
3223 _saved = false;
3224 }
3225
3226 #if BUILD_LITE!=1
3227 /// free data item
freeNode(int offset)3228 void ldomTextStorageChunk::freeNode( int offset )
3229 {
3230 offset <<= 4;
3231 if ( _buf && offset>=0 && offset<(int)_bufpos ) {
3232 TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3233 if ( (item->type==LXML_TEXT_NODE || item->type==LXML_ELEMENT_NODE) && item->dataIndex ) {
3234 item->type = LXML_NO_DATA;
3235 item->dataIndex = 0;
3236 modified();
3237 }
3238 }
3239 }
3240
3241 /// get text item from buffer by offset
getText(int offset)3242 lString8 ldomTextStorageChunk::getText( int offset )
3243 {
3244 offset <<= 4;
3245 if ( _buf && offset>=0 && offset<(int)_bufpos ) {
3246 TextDataStorageItem * item = (TextDataStorageItem *)(_buf+offset);
3247 return item->getText8();
3248 }
3249 return lString8::empty_str;
3250 }
3251 #endif
3252
3253
3254 /// pack data from _buf to _compbuf
ldomPack(const lUInt8 * buf,int bufsize,lUInt8 * & dstbuf,lUInt32 & dstsize)3255 bool ldomPack( const lUInt8 * buf, int bufsize, lUInt8 * &dstbuf, lUInt32 & dstsize )
3256 {
3257 lUInt8 tmp[PACK_BUF_SIZE]; // 64K buffer for compressed data
3258 int ret;
3259 z_stream z;
3260 z.zalloc = Z_NULL;
3261 z.zfree = Z_NULL;
3262 z.opaque = Z_NULL;
3263 ret = deflateInit( &z, DOC_DATA_COMPRESSION_LEVEL );
3264 if ( ret != Z_OK )
3265 return false;
3266 z.avail_in = bufsize;
3267 z.next_in = (unsigned char *)buf;
3268 int compressed_size = 0;
3269 lUInt8 *compressed_buf = NULL;
3270 while (true) {
3271 z.avail_out = PACK_BUF_SIZE;
3272 z.next_out = tmp;
3273 ret = deflate( &z, Z_FINISH );
3274 if (ret == Z_STREAM_ERROR) { // some error occured while packing
3275 deflateEnd(&z);
3276 if (compressed_buf)
3277 free(compressed_buf);
3278 // printf("deflate() error: %d (%d > %d)\n", ret, bufsize, compressed_size);
3279 return false;
3280 }
3281 int have = PACK_BUF_SIZE - z.avail_out;
3282 compressed_buf = cr_realloc(compressed_buf, compressed_size + have);
3283 memcpy(compressed_buf + compressed_size, tmp, have );
3284 compressed_size += have;
3285 if (z.avail_out != 0) // buffer not fully filled = deflate is done
3286 break;
3287 // printf("deflate() additional call needed (%d > %d)\n", bufsize, compressed_size);
3288 }
3289 deflateEnd(&z);
3290 dstsize = compressed_size;
3291 dstbuf = compressed_buf;
3292 // printf("deflate() done: %d > %d\n", bufsize, compressed_size);
3293 return true;
3294 }
3295
3296 /// unpack data from _compbuf to _buf
ldomUnpack(const lUInt8 * compbuf,int compsize,lUInt8 * & dstbuf,lUInt32 & dstsize)3297 bool ldomUnpack( const lUInt8 * compbuf, int compsize, lUInt8 * &dstbuf, lUInt32 & dstsize )
3298 {
3299 lUInt8 tmp[UNPACK_BUF_SIZE]; // 256K buffer for uncompressed data
3300 int ret;
3301 z_stream z = { 0 };
3302 z.zalloc = Z_NULL;
3303 z.zfree = Z_NULL;
3304 z.opaque = Z_NULL;
3305 ret = inflateInit( &z );
3306 if ( ret != Z_OK )
3307 return false;
3308 z.avail_in = compsize;
3309 z.next_in = (unsigned char *)compbuf;
3310 lUInt32 uncompressed_size = 0;
3311 lUInt8 *uncompressed_buf = NULL;
3312 while (true) {
3313 z.avail_out = UNPACK_BUF_SIZE;
3314 z.next_out = tmp;
3315 ret = inflate( &z, Z_SYNC_FLUSH );
3316 if (ret != Z_OK && ret != Z_STREAM_END) { // some error occured while unpacking
3317 inflateEnd(&z);
3318 if (uncompressed_buf)
3319 free(uncompressed_buf);
3320 // printf("inflate() error: %d (%d > %d)\n", ret, compsize, uncompressed_size);
3321 return false;
3322 }
3323 lUInt32 have = UNPACK_BUF_SIZE - z.avail_out;
3324 uncompressed_buf = cr_realloc(uncompressed_buf, uncompressed_size + have);
3325 memcpy(uncompressed_buf + uncompressed_size, tmp, have );
3326 uncompressed_size += have;
3327 if (ret == Z_STREAM_END) {
3328 break;
3329 }
3330 // printf("inflate() additional call needed (%d > %d)\n", compsize, uncompressed_size);
3331 }
3332 inflateEnd(&z);
3333 dstsize = uncompressed_size;
3334 dstbuf = uncompressed_buf;
3335 // printf("inflate() done %d > %d\n", compsize, uncompressed_size);
3336 return true;
3337 }
3338
setunpacked(const lUInt8 * buf,int bufsize)3339 void ldomTextStorageChunk::setunpacked( const lUInt8 * buf, int bufsize )
3340 {
3341 if ( _buf ) {
3342 _manager->_uncompressedSize -= _bufsize;
3343 free(_buf);
3344 _buf = NULL;
3345 _bufsize = 0;
3346 }
3347 if ( buf && bufsize ) {
3348 _bufsize = bufsize;
3349 _bufpos = bufsize;
3350 _buf = (lUInt8 *)malloc( sizeof(lUInt8) * bufsize );
3351 _manager->_uncompressedSize += _bufsize;
3352 memcpy( _buf, buf, bufsize );
3353 }
3354 }
3355
3356 /// unpacks chunk, if packed; checks storage space, compact if necessary
ensureUnpacked()3357 void ldomTextStorageChunk::ensureUnpacked()
3358 {
3359 #if BUILD_LITE!=1
3360 if ( !_buf ) {
3361 if ( _saved ) {
3362 if ( !restoreFromCache() ) {
3363 CRTimerUtil timer;
3364 timer.infinite();
3365 _manager->_cache->flush(false,timer);
3366 CRLog::warn( "restoreFromCache() failed for chunk %c%d,will try after flush", _type, _index);
3367 if ( !restoreFromCache() ) {
3368 CRLog::error( "restoreFromCache() failed for chunk %c%d", _type, _index);
3369 crFatalError( 111, "restoreFromCache() failed for chunk");
3370 }
3371 }
3372 _manager->compact( 0, this );
3373 }
3374 } else {
3375 // compact
3376 }
3377 #endif
3378 }
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388 // moved to .cpp to hide implementation
3389 // fastDOM
3390 class ldomAttributeCollection
3391 {
3392 private:
3393 lUInt16 _len;
3394 lUInt16 _size;
3395 lxmlAttribute * _list;
3396 public:
ldomAttributeCollection()3397 ldomAttributeCollection()
3398 : _len(0), _size(0), _list(NULL)
3399 {
3400 }
~ldomAttributeCollection()3401 ~ldomAttributeCollection()
3402 {
3403 if (_list)
3404 free(_list);
3405 }
operator [](int index)3406 lxmlAttribute * operator [] (int index) { return &_list[index]; }
operator [](int index) const3407 const lxmlAttribute * operator [] (int index) const { return &_list[index]; }
length() const3408 lUInt16 length() const
3409 {
3410 return _len;
3411 }
get(lUInt16 nsId,lUInt16 attrId) const3412 lUInt32 get( lUInt16 nsId, lUInt16 attrId ) const
3413 {
3414 for (lUInt16 i=0; i<_len; i++)
3415 {
3416 if (_list[i].compare( nsId, attrId ))
3417 return _list[i].index;
3418 }
3419 return LXML_ATTR_VALUE_NONE;
3420 }
set(lUInt16 nsId,lUInt16 attrId,lUInt32 valueIndex)3421 void set( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
3422 {
3423 // find existing
3424 for (lUInt16 i=0; i<_len; i++)
3425 {
3426 if (_list[i].compare( nsId, attrId ))
3427 {
3428 _list[i].index = valueIndex;
3429 return;
3430 }
3431 }
3432 // add
3433 if (_len>=_size)
3434 {
3435 _size += 4;
3436 _list = cr_realloc( _list, _size );
3437 }
3438 _list[ _len++ ].setData(nsId, attrId, valueIndex);
3439 }
add(lUInt16 nsId,lUInt16 attrId,lUInt32 valueIndex)3440 void add( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
3441 {
3442 // find existing
3443 if (_len>=_size)
3444 {
3445 _size += 4;
3446 _list = cr_realloc( _list, _size );
3447 }
3448 _list[ _len++ ].setData(nsId, attrId, valueIndex);
3449 }
add(const lxmlAttribute * v)3450 void add( const lxmlAttribute * v )
3451 {
3452 // find existing
3453 if (_len>=_size)
3454 {
3455 _size += 4;
3456 _list = cr_realloc( _list, _size );
3457 }
3458 _list[ _len++ ] = *v;
3459 }
3460 };
3461
3462
3463 /*
3464 class simpleLogFile
3465 {
3466 public:
3467 FILE * f;
3468 simpleLogFile(const char * fname) { f = fopen( fname, "wt" ); }
3469 ~simpleLogFile() { if (f) fclose(f); }
3470 simpleLogFile & operator << ( const char * str ) { fprintf( f, "%s", str ); fflush( f ); return *this; }
3471 simpleLogFile & operator << ( int d ) { fprintf( f, "%d(0x%X) ", d, d ); fflush( f ); return *this; }
3472 simpleLogFile & operator << ( const lChar32 * str )
3473 {
3474 if (str)
3475 {
3476 for (; *str; str++ )
3477 {
3478 fputc( *str >= 32 && *str<127 ? *str : '?', f );
3479 }
3480 }
3481 fflush( f );
3482 return *this;
3483 }
3484 };
3485
3486 simpleLogFile logfile("logfile.log");
3487 */
3488
3489
3490
3491 /////////////////////////////////////////////////////////////////
3492 /// lxmlDocument
3493
3494
lxmlDocBase(int)3495 lxmlDocBase::lxmlDocBase(int /*dataBufSize*/)
3496 : tinyNodeCollection(),
3497 _elementNameTable(MAX_ELEMENT_TYPE_ID)
3498 , _attrNameTable(MAX_ATTRIBUTE_TYPE_ID)
3499 , _nsNameTable(MAX_NAMESPACE_TYPE_ID)
3500 , _nextUnknownElementId(UNKNOWN_ELEMENT_TYPE_ID)
3501 , _nextUnknownAttrId(UNKNOWN_ATTRIBUTE_TYPE_ID)
3502 , _nextUnknownNsId(UNKNOWN_NAMESPACE_TYPE_ID)
3503 , _attrValueTable( DOC_STRING_HASH_SIZE )
3504 ,_idNodeMap(8192)
3505 ,_urlImageMap(1024)
3506 ,_idAttrId(0)
3507 ,_nameAttrId(0)
3508 #if BUILD_LITE!=1
3509 //,_keepData(false)
3510 //,_mapped(false)
3511 #endif
3512 #if BUILD_LITE!=1
3513 ,_pagesData(8192)
3514 #endif
3515 {
3516 // create and add one data buffer
3517 _stylesheet.setDocument( this );
3518 }
3519
3520 /// Destructor
~lxmlDocBase()3521 lxmlDocBase::~lxmlDocBase()
3522 {
3523 }
3524
onAttributeSet(lUInt16 attrId,lUInt32 valueId,ldomNode * node)3525 void lxmlDocBase::onAttributeSet( lUInt16 attrId, lUInt32 valueId, ldomNode * node )
3526 {
3527 if ( _idAttrId==0 )
3528 _idAttrId = _attrNameTable.idByName("id");
3529 if ( _nameAttrId==0 )
3530 _nameAttrId = _attrNameTable.idByName("name");
3531 if (attrId == _idAttrId) {
3532 _idNodeMap.set( valueId, node->getDataIndex() );
3533 } else if ( attrId==_nameAttrId ) {
3534 lString32 nodeName = node->getNodeName();
3535 if (nodeName == "a")
3536 _idNodeMap.set( valueId, node->getDataIndex() );
3537 }
3538 }
3539
getNsNameIndex(const lChar32 * name)3540 lUInt16 lxmlDocBase::getNsNameIndex( const lChar32 * name )
3541 {
3542 const LDOMNameIdMapItem * item = _nsNameTable.findItem( name );
3543 if (item)
3544 return item->id;
3545 _nsNameTable.AddItem( _nextUnknownNsId, lString32(name), NULL );
3546 return _nextUnknownNsId++;
3547 }
3548
getNsNameIndex(const lChar8 * name)3549 lUInt16 lxmlDocBase::getNsNameIndex( const lChar8 * name )
3550 {
3551 const LDOMNameIdMapItem * item = _nsNameTable.findItem( name );
3552 if (item)
3553 return item->id;
3554 _nsNameTable.AddItem( _nextUnknownNsId, lString32(name), NULL );
3555 return _nextUnknownNsId++;
3556 }
3557
getAttrNameIndex(const lChar32 * name)3558 lUInt16 lxmlDocBase::getAttrNameIndex( const lChar32 * name )
3559 {
3560 const LDOMNameIdMapItem * item = _attrNameTable.findItem( name );
3561 if (item)
3562 return item->id;
3563 _attrNameTable.AddItem( _nextUnknownAttrId, lString32(name), NULL );
3564 return _nextUnknownAttrId++;
3565 }
3566
getAttrNameIndex(const lChar8 * name)3567 lUInt16 lxmlDocBase::getAttrNameIndex( const lChar8 * name )
3568 {
3569 const LDOMNameIdMapItem * item = _attrNameTable.findItem( name );
3570 if (item)
3571 return item->id;
3572 _attrNameTable.AddItem( _nextUnknownAttrId, lString32(name), NULL );
3573 return _nextUnknownAttrId++;
3574 }
3575
getElementNameIndex(const lChar32 * name)3576 lUInt16 lxmlDocBase::getElementNameIndex( const lChar32 * name )
3577 {
3578 const LDOMNameIdMapItem * item = _elementNameTable.findItem( name );
3579 if (item)
3580 return item->id;
3581 _elementNameTable.AddItem( _nextUnknownElementId, lString32(name), NULL );
3582 return _nextUnknownElementId++;
3583 }
3584
findElementNameIndex(const lChar8 * name)3585 lUInt16 lxmlDocBase::findElementNameIndex( const lChar8 * name )
3586 {
3587 const LDOMNameIdMapItem * item = _elementNameTable.findItem( name );
3588 if (item)
3589 return item->id;
3590 return 0;
3591 }
3592
getElementNameIndex(const lChar8 * name)3593 lUInt16 lxmlDocBase::getElementNameIndex( const lChar8 * name )
3594 {
3595 const LDOMNameIdMapItem * item = _elementNameTable.findItem( name );
3596 if (item)
3597 return item->id;
3598 _elementNameTable.AddItem( _nextUnknownElementId, lString32(name), NULL );
3599 return _nextUnknownElementId++;
3600 }
3601
3602 /// create formatted text object with options set
createFormattedText()3603 LFormattedText * lxmlDocBase::createFormattedText()
3604 {
3605 LFormattedText * p = new LFormattedText();
3606 p->setImageScalingOptions(&_imgScalingOptions);
3607 p->setSpaceWidthScalePercent(_spaceWidthScalePercent);
3608 p->setMinSpaceCondensingPercent(_minSpaceCondensingPercent);
3609 p->setUnusedSpaceThresholdPercent(_unusedSpaceThresholdPercent);
3610 p->setMaxAddedLetterSpacingPercent(_maxAddedLetterSpacingPercent);
3611 p->setHighlightOptions(&_highlightOptions);
3612 return p;
3613 }
3614
3615 /// returns main element (i.e. FictionBook for FB2)
getRootNode()3616 ldomNode * lxmlDocBase::getRootNode()
3617 {
3618 return getTinyNode(17);
3619 }
3620
ldomDocument()3621 ldomDocument::ldomDocument()
3622 : lxmlDocBase(DEF_DOC_DATA_BUFFER_SIZE),
3623 m_toc(this)
3624 , m_pagemap(this)
3625 #if BUILD_LITE!=1
3626 , _last_docflags(0)
3627 , _page_height(0)
3628 , _page_width(0)
3629 , _rendered(false)
3630 , _just_rendered_from_cache(false)
3631 , _toc_from_cache_valid(false)
3632 , _warnings_seen_bitmap(0)
3633 #endif
3634 , lists(100)
3635 {
3636 _docIndex = ldomNode::registerDocument(this);
3637 allocTinyElement(NULL, 0, 0);
3638 // Note: valgrind reports (sometimes, when some document is opened or closed,
3639 // with metadataOnly or not) a memory leak (64 bytes in 1 blocks are definitely
3640 // lost), about this, created in allocTinyElement():
3641 // tinyElement * elem = new tinyElement(...)
3642 // possibly because it's not anchored anywhere.
3643 // Attempt at anchoring into a _nullNode, and calling ->detroy()
3644 // in ~ldomDocument(), did not prevent this report, and caused other ones...
3645
3646 //new ldomElement( this, NULL, 0, 0, 0 );
3647 //assert( _instanceMapCount==2 );
3648 }
3649
3650 /// Copy constructor - copies ID tables contents
lxmlDocBase(lxmlDocBase & doc)3651 lxmlDocBase::lxmlDocBase( lxmlDocBase & doc )
3652 : tinyNodeCollection(doc)
3653 , _elementNameTable(doc._elementNameTable) // Element Name<->Id map
3654 , _attrNameTable(doc._attrNameTable) // Attribute Name<->Id map
3655 , _nsNameTable(doc._nsNameTable) // Namespace Name<->Id map
3656 , _nextUnknownElementId(doc._nextUnknownElementId) // Next Id for unknown element
3657 , _nextUnknownAttrId(doc._nextUnknownAttrId) // Next Id for unknown attribute
3658 , _nextUnknownNsId(doc._nextUnknownNsId) // Next Id for unknown namespace
3659 //lvdomStyleCache _styleCache; // Style cache
3660 , _attrValueTable(doc._attrValueTable)
3661 , _idNodeMap(doc._idNodeMap)
3662 , _urlImageMap(1024)
3663 , _idAttrId(doc._idAttrId) // Id for "id" attribute name
3664 //, _docFlags(doc._docFlags)
3665 #if BUILD_LITE!=1
3666 , _pagesData(8192)
3667 #endif
3668 {
3669 }
3670
3671 /// creates empty document which is ready to be copy target of doc partial contents
ldomDocument(ldomDocument & doc)3672 ldomDocument::ldomDocument( ldomDocument & doc )
3673 : lxmlDocBase(doc)
3674 , m_toc(this)
3675 , m_pagemap(this)
3676 #if BUILD_LITE!=1
3677 , _def_font(doc._def_font) // default font
3678 , _def_style(doc._def_style)
3679 , _last_docflags(doc._last_docflags)
3680 , _page_height(doc._page_height)
3681 , _page_width(doc._page_width)
3682 #endif
3683 , _container(doc._container)
3684 , lists(100)
3685 {
3686 _docIndex = ldomNode::registerDocument(this);
3687 }
3688
writeNode(LVStream * stream,ldomNode * node,bool treeLayout)3689 static void writeNode( LVStream * stream, ldomNode * node, bool treeLayout )
3690 {
3691 int level = 0;
3692 if ( treeLayout ) {
3693 level = node->getNodeLevel();
3694 for (int i=0; i<level; i++ )
3695 *stream << " ";
3696 }
3697 if ( node->isText() )
3698 {
3699 lString8 txt = node->getText8();
3700 *stream << txt;
3701 if ( treeLayout )
3702 *stream << "\n";
3703 }
3704 else if ( node->isElement() )
3705 {
3706 lString8 elemName = UnicodeToUtf8(node->getNodeName());
3707 lString8 elemNsName = UnicodeToUtf8(node->getNodeNsName());
3708 if (!elemNsName.empty())
3709 elemName = elemNsName + ":" + elemName;
3710 if (!elemName.empty())
3711 *stream << "<" << elemName;
3712 int i;
3713 for (i=0; i<(int)node->getAttrCount(); i++)
3714 {
3715 const lxmlAttribute * attr = node->getAttribute(i);
3716 if (attr)
3717 {
3718 lString8 attrName( UnicodeToUtf8(node->getDocument()->getAttrName(attr->id)) );
3719 lString8 nsName( UnicodeToUtf8(node->getDocument()->getNsName(attr->nsid)) );
3720 lString8 attrValue( UnicodeToUtf8(node->getDocument()->getAttrValue(attr->index)) );
3721 *stream << " ";
3722 if ( nsName.length() > 0 )
3723 *stream << nsName << ":";
3724 *stream << attrName << "=\"" << attrValue << "\"";
3725 }
3726 }
3727
3728 #if 0
3729 if (!elemName.empty())
3730 {
3731 ldomNode * elem = node;
3732 lvdomElementFormatRec * fmt = elem->getRenderData();
3733 css_style_ref_t style = elem->getStyle();
3734 if ( fmt ) {
3735 lvRect rect;
3736 elem->getAbsRect( rect );
3737 *stream << U" fmt=\"";
3738 *stream << U"rm:" << lString32::itoa( (int)elem->getRendMethod() ) << U" ";
3739 if ( style.isNull() )
3740 *stream << U"style: NULL ";
3741 else {
3742 *stream << U"disp:" << lString32::itoa( (int)style->display ) << U" ";
3743 }
3744 *stream << U"y:" << lString32::itoa( (int)fmt->getY() ) << U" ";
3745 *stream << U"h:" << lString32::itoa( (int)fmt->getHeight() ) << U" ";
3746 *stream << U"ay:" << lString32::itoa( (int)rect.top ) << U" ";
3747 *stream << U"ah:" << lString32::itoa( (int)rect.height() ) << U" ";
3748 *stream << U"\"";
3749 }
3750 }
3751 #endif
3752
3753 if ( node->getChildCount() == 0 ) {
3754 if (!elemName.empty())
3755 {
3756 if ( elemName[0] == '?' )
3757 *stream << "?>";
3758 else
3759 *stream << "/>";
3760 }
3761 if ( treeLayout )
3762 *stream << "\n";
3763 } else {
3764 if (!elemName.empty())
3765 *stream << ">";
3766 if ( treeLayout )
3767 *stream << "\n";
3768 for (i=0; i<(int)node->getChildCount(); i++)
3769 {
3770 writeNode( stream, node->getChildNode(i), treeLayout );
3771 }
3772 if ( treeLayout ) {
3773 for (int i=0; i<level; i++ )
3774 *stream << " ";
3775 }
3776 if (!elemName.empty())
3777 *stream << "</" << elemName << ">";
3778 if ( treeLayout )
3779 *stream << "\n";
3780 }
3781 }
3782 }
3783
3784 // Extended version of previous function for displaying selection HTML, with tunable output
3785 #define WRITENODEEX_TEXT_HYPHENATE 0x0001 ///< add soft-hyphens where hyphenation is allowed
3786 #define WRITENODEEX_TEXT_MARK_NODE_BOUNDARIES 0x0002 ///< mark start and end of text nodes (useful when indented)
3787 #define WRITENODEEX_TEXT_SHOW_UNICODE_CODEPOINT 0x0004 ///< show unicode codepoint after char
3788 #define WRITENODEEX_TEXT_UNESCAPED 0x0008 ///< let &, < and > unescaped in text nodes (makes HTML invalid)
3789 #define WRITENODEEX_INDENT_NEWLINE 0x0010 ///< indent newlines according to node level
3790 #define WRITENODEEX_NEWLINE_BLOCK_NODES 0x0020 ///< start only nodes rendered as block/final on a new line,
3791 /// so inline elements and text nodes are stuck together
3792 #define WRITENODEEX_NEWLINE_ALL_NODES 0x0040 ///< start all nodes on a new line
3793 #define WRITENODEEX_UNUSED_1 0x0080 ///<
3794 #define WRITENODEEX_NB_SKIPPED_CHARS 0x0100 ///< show number of skipped chars in text nodes: (...43...)
3795 #define WRITENODEEX_NB_SKIPPED_NODES 0x0200 ///< show number of skipped sibling nodes: [...17...]
3796 #define WRITENODEEX_SHOW_REND_METHOD 0x0400 ///< show rendering method at end of tag (<div ~F> =Final, <b ~i>=Inline...)
3797 #define WRITENODEEX_SHOW_MISC_INFO 0x0800 ///< show additional info (depend on context)
3798 #define WRITENODEEX_ADD_UPPER_DIR_LANG_ATTR 0x1000 ///< add dir= and lang= grabbed from upper nodes
3799 #define WRITENODEEX_GET_CSS_FILES 0x2000 ///< ensure css files that apply to initial node are returned
3800 /// in &cssFiles (needed when not starting from root node)
3801 #define WRITENODEEX_INCLUDE_STYLESHEET_ELEMENT 0x4000 ///< includes crengine <stylesheet> element in HTML
3802 /// (not done if outside of sub-tree)
3803 #define WRITENODEEX_COMPUTED_STYLES_AS_ATTR 0x8000 ///< set style='' from computed styles (not implemented)
3804
3805
3806 #define WNEFLAG(x) ( wflags & WRITENODEEX_##x )
3807
writeNodeEx(LVStream * stream,ldomNode * node,lString32Collection & cssFiles,int wflags=0,ldomXPointerEx startXP=ldomXPointerEx (),ldomXPointerEx endXP=ldomXPointerEx (),int indentBaseLevel=-1)3808 static void writeNodeEx( LVStream * stream, ldomNode * node, lString32Collection & cssFiles, int wflags=0,
3809 ldomXPointerEx startXP=ldomXPointerEx(), ldomXPointerEx endXP=ldomXPointerEx(), int indentBaseLevel=-1)
3810 {
3811 bool isStartNode = false;
3812 bool isEndNode = false;
3813 bool isAfterStart = false;
3814 bool isBeforeEnd = false;
3815 bool containsStart = false;
3816 bool containsEnd = false;
3817
3818 if ( !startXP.isNull() && !endXP.isNull() ) {
3819 ldomXPointerEx currentEXP = ldomXPointerEx(node, 0);
3820 // Use start (offset=0) of text node for comparisons, but keep original XPointers
3821 ldomXPointerEx startEXP = ldomXPointerEx( startXP );
3822 startEXP.setOffset(0);
3823 ldomXPointerEx endEXP = ldomXPointerEx( endXP );
3824 endEXP.setOffset(0);
3825 if (currentEXP == startEXP)
3826 isStartNode = true;
3827 if (currentEXP == endEXP)
3828 isEndNode = true;
3829 if ( currentEXP.compare( startEXP ) >= 0 ) {
3830 isAfterStart = true;
3831 }
3832 if ( currentEXP.compare( endEXP ) <= 0 ) {
3833 isBeforeEnd = true;
3834 }
3835 ldomNode *tmp;
3836 tmp = startXP.getNode();
3837 while (tmp) {
3838 if (tmp == node) {
3839 containsStart = true;
3840 break;
3841 }
3842 tmp = tmp->getParentNode();
3843 }
3844 tmp = endXP.getNode();
3845 while (tmp) {
3846 if (tmp == node) {
3847 containsEnd = true;
3848 break;
3849 }
3850 tmp = tmp->getParentNode();
3851 }
3852 }
3853 else {
3854 containsStart = true;
3855 containsEnd = true;
3856 isAfterStart = true;
3857 isBeforeEnd = true;
3858 // but not isStartNode nor isEndNode, as these use startXP and endXP
3859 }
3860
3861 bool isInitialNode = false;
3862 lString32 initialDirAttribute = lString32::empty_str;
3863 lString32 initialLangAttribute = lString32::empty_str;
3864 if (indentBaseLevel < 0) { // initial call (recursive ones will have it >=0)
3865 indentBaseLevel = node->getNodeLevel();
3866 isInitialNode = true;
3867 if ( WNEFLAG(ADD_UPPER_DIR_LANG_ATTR) && !node->isRoot() ) {
3868 // Grab any dir="rtl" and lang="ar_AA" attributes from some parent node
3869 if ( !node->hasAttribute( attr_dir ) ) {
3870 ldomNode *pnode = node->getParentNode();
3871 for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
3872 if ( pnode->hasAttribute(attr_dir) ) {
3873 initialDirAttribute = pnode->getAttributeValue(attr_dir);
3874 break;
3875 }
3876 }
3877 }
3878 if ( !node->hasAttribute( attr_lang ) ) {
3879 ldomNode *pnode = node->getParentNode();
3880 for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
3881 if ( pnode->hasAttribute(attr_lang) ) {
3882 initialLangAttribute = pnode->getAttributeValue(attr_lang);
3883 break;
3884 }
3885 }
3886 }
3887 }
3888 }
3889 int level = node->getNodeLevel();
3890 if ( node->isText() && isAfterStart && isBeforeEnd ) {
3891 bool doNewLine = WNEFLAG(NEWLINE_ALL_NODES);
3892 bool doIndent = doNewLine && WNEFLAG(INDENT_NEWLINE);
3893 lString32 txt = node->getText();
3894 lString8 prefix = lString8::empty_str;
3895 lString8 suffix = lString8::empty_str;
3896
3897 if ( isEndNode ) {
3898 // show the number of chars not written after selection "(...n...)"
3899 int nodeLength = endXP.getText().length();
3900 int endOffset = endXP.getOffset();
3901 if (endOffset < nodeLength) {
3902 txt = txt.substr(0, endOffset);
3903 if ( WNEFLAG(NB_SKIPPED_CHARS) )
3904 suffix << "(…" << lString8().appendDecimal(nodeLength-endOffset) << "…)";
3905 }
3906 }
3907 if ( WNEFLAG(TEXT_MARK_NODE_BOUNDARIES) ) {
3908 // We use non-ordinary chars to mark start and end of text
3909 // node, which can help noticing spaces at start or end
3910 // when NEWLINE_ALL_NODES and INDENT_NEWLINE are used.
3911 // Some candidates chars are:
3912 // Greyish, discreet, but may be confused with parenthesis:
3913 // prefix << "⟨"; // U+27E8 Mathematical Left Angle Bracket
3914 // suffix << "⟩"; // U+27E9 Mathematical Right Angle Bracket
3915 // Greyish, a bit less discreet, but won't be confused with any other casual char:
3916 // prefix << "⟪"; // U+27EA Mathematical Left Double Angle Bracket
3917 // suffix << "⟫"; // U+27EB Mathematical Right Double Angle Bracket
3918 // A bit too dark, but won't be confused with any other casual char:
3919 // prefix << "⎛"; // U+239B Left Parenthesis Upper Hook
3920 // suffix << "⎠"; // U+23A0 Right Parenthesis Lower Hook (may have too much leading space)
3921 prefix << "⟪"; // U+27EA Mathematical Left Double Angle Bracket
3922 suffix << "⟫"; // U+27EB Mathematical Right Double Angle Bracket
3923 }
3924 if ( isStartNode ) {
3925 // show the number of chars not written before selection "(...n...)"
3926 int offset = startXP.getOffset();
3927 if (offset > 0) {
3928 txt = txt.substr(offset);
3929 if ( WNEFLAG(NB_SKIPPED_CHARS) )
3930 prefix << "(…" << lString8().appendDecimal(offset) << "…)";
3931 }
3932 if ( WNEFLAG(NB_SKIPPED_NODES) ) {
3933 // show the number of sibling nodes not written before selection "[...n..]"
3934 int nbIgnoredPrecedingSiblings = node->getNodeIndex();
3935 if (nbIgnoredPrecedingSiblings) {
3936 if (doIndent)
3937 for ( int i=indentBaseLevel; i<level; i++ )
3938 *stream << " ";
3939 *stream << "[…" << lString8().appendDecimal(nbIgnoredPrecedingSiblings) << "…]";
3940 if (doNewLine)
3941 *stream << "\n";
3942 }
3943 }
3944 }
3945 if (doIndent)
3946 for ( int i=indentBaseLevel; i<level; i++ )
3947 *stream << " ";
3948 if ( ! WNEFLAG(TEXT_UNESCAPED) ) {
3949 // Use a temporary char we're not likely to find in the DOM
3950 // (see https://en.wikipedia.org/wiki/Specials_(Unicode_block) )
3951 // for 2-steps '&' replacement (to avoid infinite loop or the
3952 // need for more complicated code)
3953 while ( txt.replace( cs32("&"), cs32(U"\xFFFF") ) ) ;
3954 while ( txt.replace( cs32(U"\xFFFF"), cs32("&") ) ) ;
3955 while ( txt.replace( cs32("<"), cs32("<") ) ) ;
3956 while ( txt.replace( cs32(">"), cs32(">") ) ) ;
3957 }
3958 #define HYPH_MIN_WORD_LEN_TO_HYPHENATE 4
3959 #define HYPH_MAX_WORD_SIZE 64
3960 // (No hyphenation if we are showing unicode codepoint)
3961 if ( WNEFLAG(TEXT_SHOW_UNICODE_CODEPOINT) ) {
3962 *stream << prefix;
3963 for ( int i=0; i<txt.length(); i++ )
3964 *stream << UnicodeToUtf8(txt.substr(i, 1)) << "⟨U+" << lString8().appendHex(txt[i]) << "⟩";
3965 *stream << suffix;
3966 }
3967 else if ( WNEFLAG(TEXT_HYPHENATE) && HyphMan::isEnabled() && txt.length() >= HYPH_MIN_WORD_LEN_TO_HYPHENATE ) {
3968 // Add soft-hyphens where HyphMan (with the user or language current hyphenation
3969 // settings) says hyphenation is allowed.
3970 // We do that here while we output the text to avoid the need
3971 // for temporary storage of a string with soft-hyphens added.
3972 const lChar32 * text32 = txt.c_str();
3973 int txtlen = txt.length();
3974 lUInt8 * flags = (lUInt8*)calloc(txtlen, sizeof(*flags));
3975 lUInt16 widths[HYPH_MAX_WORD_SIZE] = { 0 }; // array needed by hyphenate()
3976 // Lookup words starting from the end, just because lStr_findWordBounds()
3977 // will ensure the iteration that way.
3978 int wordpos = txtlen;
3979 while ( wordpos > 0 ) {
3980 // lStr_findWordBounds() will find the word contained at wordpos
3981 // (or the previous word if wordpos happens to be a space or some
3982 // punctuation) by looking only for alpha chars in m_text.
3983 int start, end;
3984 lStr_findWordBounds( text32, txtlen, wordpos, start, end );
3985 if ( end <= HYPH_MIN_WORD_LEN_TO_HYPHENATE ) {
3986 // Too short word at start, we're done
3987 break;
3988 }
3989 int len = end - start;
3990 if ( len < HYPH_MIN_WORD_LEN_TO_HYPHENATE ) {
3991 // Too short word found, skip it
3992 wordpos = start - 1;
3993 continue;
3994 }
3995 if ( start >= wordpos ) {
3996 // Shouldn't happen, but let's be sure we don't get stuck
3997 wordpos = wordpos - HYPH_MIN_WORD_LEN_TO_HYPHENATE;
3998 continue;
3999 }
4000 // We have a valid word to look for hyphenation
4001 if ( len > HYPH_MAX_WORD_SIZE ) // hyphenate() stops/truncates at 64 chars
4002 len = HYPH_MAX_WORD_SIZE;
4003 // Have hyphenate() set flags inside 'flags'
4004 // (Fetching the lang_cfg for each text node is not really cheap, but
4005 // it's easier than having to pass it to each writeNodeEx())
4006 TextLangMan::getTextLangCfg(node)->getHyphMethod()->hyphenate(text32+start, len, widths, flags+start, 0, 0xFFFF, 1);
4007 // Continue with previous word
4008 wordpos = start - 1;
4009 }
4010 // Output text, and add a soft-hyphen where there are flags
4011 *stream << prefix;
4012 for ( int i=0; i<txt.length(); i++ ) {
4013 *stream << UnicodeToUtf8(txt.substr(i, 1));
4014 if ( flags[i] & LCHAR_ALLOW_HYPH_WRAP_AFTER )
4015 *stream << "";
4016 }
4017 *stream << suffix;
4018 free(flags);
4019 }
4020 else {
4021 *stream << prefix << UnicodeToUtf8(txt) << suffix;
4022 }
4023 if (doNewLine)
4024 *stream << "\n";
4025 if ( isEndNode && WNEFLAG(NB_SKIPPED_NODES) ) {
4026 // show the number of sibling nodes not written after selection "[...n..]"
4027 ldomNode * parent = node->getParentNode();
4028 int nbIgnoredFollowingSiblings = parent ? (parent->getChildCount() - 1 - node->getNodeIndex()) : 0;
4029 if (nbIgnoredFollowingSiblings) {
4030 if (doIndent)
4031 for ( int i=indentBaseLevel; i<level; i++ )
4032 *stream << " ";
4033 *stream << "[…" << lString8().appendDecimal(nbIgnoredFollowingSiblings) << "…]";
4034 if (doNewLine)
4035 *stream << "\n";
4036 }
4037 }
4038 }
4039 else if ( node->isElement() ) {
4040 lString8 elemName = UnicodeToUtf8(node->getNodeName());
4041 lString8 elemNsName = UnicodeToUtf8(node->getNodeNsName());
4042 // Write elements that are between start and end, but also those that
4043 // are parents of start and end nodes
4044 bool toWrite = (isAfterStart && isBeforeEnd) || containsStart || containsEnd;
4045 bool isStylesheetTag = false;
4046 if ( node->getNodeId() == el_stylesheet ) {
4047 toWrite = false;
4048 if ( WNEFLAG(INCLUDE_STYLESHEET_ELEMENT) ) {
4049 // We may meet a <stylesheet> tag that is not between startXP and endXP and
4050 // does not contain any of them, but its parent (body or DocFragment) does.
4051 // Write it if requested, as it's useful when inspecting HTML.
4052 toWrite = true;
4053 isStylesheetTag = true; // for specific parsing and writting
4054 }
4055 }
4056 if ( ! toWrite )
4057 return;
4058
4059 // In case we're called (when debugging) while styles have been reset,
4060 // avoid crash on stuff like isBoxingInlineBox()/isFloatingBox() that
4061 // do check styles
4062 bool has_styles_set = !node->getStyle().isNull();
4063
4064 bool doNewLineBeforeStartTag = false;
4065 bool doNewLineAfterStartTag = false;
4066 bool doNewLineBeforeEndTag = false; // always stays false, newline done by child elements
4067 bool doNewLineAfterEndTag = false;
4068 bool doIndentBeforeStartTag = false;
4069 bool doIndentBeforeEndTag = false;
4070 // Specific for floats and inline-blocks among inlines inside final, that
4071 // we want to show on their own lines:
4072 bool doNewlineBeforeIndentBeforeStartTag = false;
4073 bool doIndentAfterNewLineAfterEndTag = false;
4074 bool doIndentOneLevelLessAfterNewLineAfterEndTag = false;
4075 if ( WNEFLAG(NEWLINE_ALL_NODES) ) {
4076 doNewLineBeforeStartTag = true;
4077 doNewLineAfterStartTag = true;
4078 // doNewLineBeforeEndTag = false; // done by child elements
4079 doNewLineAfterEndTag = true;
4080 doIndentBeforeStartTag = WNEFLAG(INDENT_NEWLINE);
4081 doIndentBeforeEndTag = WNEFLAG(INDENT_NEWLINE);
4082 }
4083 else if ( WNEFLAG(NEWLINE_BLOCK_NODES) ) {
4084 // We consider block elements according to crengine decision for their
4085 // rendering method, which gives us a visual hint of it.
4086 lvdom_element_render_method rm = node->getRendMethod();
4087 // Text and inline nodes stay stuck together, but not all others
4088 if (rm == erm_invisible) {
4089 // We don't know how invisible nodes would be displayed if
4090 // they were visible. Make the invisible tree like inline
4091 // among finals, so they don't take too much height.
4092 if (node->getParentNode()) {
4093 rm = node->getParentNode()->getRendMethod();
4094 if (rm == erm_invisible || rm == erm_inline || rm == erm_final)
4095 rm = erm_inline;
4096 else
4097 rm = erm_final;
4098 }
4099 }
4100 if ( rm != erm_inline || (has_styles_set && node->isBoxingInlineBox()) ) {
4101 doNewLineBeforeStartTag = true;
4102 doNewLineAfterStartTag = true;
4103 // doNewLineBeforeEndTag = false; // done by child elements
4104 doNewLineAfterEndTag = true;
4105 doIndentBeforeStartTag = WNEFLAG(INDENT_NEWLINE);
4106 doIndentBeforeEndTag = WNEFLAG(INDENT_NEWLINE);
4107 if (rm == erm_final) {
4108 // Nodes with rend method erm_final contain only text and inline nodes.
4109 // We want these erm_final indented, but not their content
4110 doNewLineAfterStartTag = false;
4111 doIndentBeforeEndTag = false;
4112 }
4113 else if (has_styles_set && node->isFloatingBox()) {
4114 lvdom_element_render_method prm = node->getParentNode()->getRendMethod();
4115 if (prm == erm_final || prm == erm_inline) {
4116 doNewlineBeforeIndentBeforeStartTag = true;
4117 doIndentAfterNewLineAfterEndTag = WNEFLAG(INDENT_NEWLINE);
4118 // If we're the last node in parent collection, indent one level less,
4119 // so that next node (the parent) is not at this node level
4120 ldomNode * parent = node->getParentNode();
4121 if ( parent && (node->getNodeIndex() == parent->getChildCount()-1) )
4122 doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4123 else if ( parent && (node->getNodeIndex() == parent->getChildCount()-2)
4124 && parent->getChildNode(parent->getChildCount()-1)->isText() )
4125 doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4126 else if ( containsEnd ) // same if next siblings won't be shown
4127 doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4128 // But if previous sibling node is a floating or boxing inline node
4129 // that have done what we just did, cancel some of what we did
4130 if ( node->getNodeIndex() > 0 ) {
4131 ldomNode * prevsibling = parent->getChildNode(node->getNodeIndex()-1);
4132 if ( prevsibling->isFloatingBox() || prevsibling->isBoxingInlineBox() ) {
4133 doNewlineBeforeIndentBeforeStartTag = false;
4134 doIndentBeforeStartTag = false;
4135 }
4136 }
4137 }
4138 }
4139 else if (has_styles_set && node->isBoxingInlineBox()) {
4140 doNewlineBeforeIndentBeforeStartTag = true;
4141 doIndentAfterNewLineAfterEndTag = WNEFLAG(INDENT_NEWLINE);
4142 // Same as above
4143 ldomNode * parent = node->getParentNode();
4144 if ( parent && (node->getNodeIndex() == parent->getChildCount()-1) )
4145 doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4146 else if ( parent && (node->getNodeIndex() == parent->getChildCount()-2)
4147 && parent->getChildNode(parent->getChildCount()-1)->isText() )
4148 doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4149 else if ( containsEnd )
4150 doIndentOneLevelLessAfterNewLineAfterEndTag = true;
4151 if ( node->getNodeIndex() > 0 ) {
4152 ldomNode * prevsibling = parent->getChildNode(node->getNodeIndex()-1);
4153 if ( prevsibling->isFloatingBox() || prevsibling->isBoxingInlineBox() ) {
4154 doNewlineBeforeIndentBeforeStartTag = false;
4155 doIndentBeforeStartTag = false;
4156 }
4157 }
4158 }
4159 }
4160 }
4161
4162 if ( containsStart && WNEFLAG(NB_SKIPPED_NODES) ) {
4163 // Previous siblings did not contain startXP: show how many they are
4164 int nbIgnoredPrecedingSiblings = node->getNodeIndex();
4165 if (nbIgnoredPrecedingSiblings && WNEFLAG(INCLUDE_STYLESHEET_ELEMENT) &&
4166 node->getParentNode()->getFirstChild()->isElement() &&
4167 node->getParentNode()->getFirstChild()->getNodeId() == el_stylesheet) {
4168 nbIgnoredPrecedingSiblings--; // we have written the <stylesheet> tag
4169 }
4170 if (nbIgnoredPrecedingSiblings) {
4171 if (doIndentBeforeStartTag)
4172 for ( int i=indentBaseLevel; i<level; i++ )
4173 *stream << " ";
4174 *stream << "[…" << lString8().appendDecimal(nbIgnoredPrecedingSiblings) << "…]";
4175 if (doNewLineBeforeStartTag)
4176 *stream << "\n";
4177 }
4178 }
4179 if (doNewlineBeforeIndentBeforeStartTag)
4180 *stream << "\n";
4181 if (doIndentBeforeStartTag)
4182 for ( int i=indentBaseLevel; i<level; i++ )
4183 *stream << " ";
4184 if ( elemName.empty() ) {
4185 // should not happen (except for the root node, that we might have skipped)
4186 elemName = node->isRoot() ? lString8("RootNode") : (elemNsName + "???");
4187 }
4188 if ( !elemNsName.empty() )
4189 elemName = elemNsName + ":" + elemName;
4190 *stream << "<" << elemName;
4191 if ( isInitialNode ) {
4192 // Add any dir="rtl" and lang="ar_AA" attributes grabbed from some parent node
4193 if ( !initialDirAttribute.empty() ) {
4194 *stream << " dir=\"" << UnicodeToUtf8(initialDirAttribute) << "\"";
4195 }
4196 if ( !initialLangAttribute.empty() ) {
4197 *stream << " lang=\"" << UnicodeToUtf8(initialLangAttribute) << "\"";
4198 }
4199 }
4200 for ( int i=0; i<(int)node->getAttrCount(); i++ ) {
4201 const lxmlAttribute * attr = node->getAttribute(i);
4202 if (attr) {
4203 lString8 attrName( UnicodeToUtf8(node->getDocument()->getAttrName(attr->id)) );
4204 lString8 nsName( UnicodeToUtf8(node->getDocument()->getNsName(attr->nsid)) );
4205 lString8 attrValue( UnicodeToUtf8(node->getDocument()->getAttrValue(attr->index)) );
4206 if ( WNEFLAG(SHOW_MISC_INFO) && has_styles_set ) {
4207 if ( node->getNodeId() == el_pseudoElem && (attr->id == attr_Before || attr->id == attr_After) ) {
4208 // Show the rendered content as the otherwise empty Before/After attribute value
4209 if ( WNEFLAG(TEXT_SHOW_UNICODE_CODEPOINT) ) {
4210 lString32 content = get_applied_content_property(node);
4211 attrValue.empty();
4212 for ( int i=0; i<content.length(); i++ ) {
4213 attrValue << UnicodeToUtf8(content.substr(i, 1)) << "⟨U+" << lString8().appendHex(content[i]) << "⟩";
4214 }
4215 }
4216 else {
4217 attrValue = UnicodeToUtf8(get_applied_content_property(node));
4218 }
4219 }
4220 }
4221 *stream << " ";
4222 if ( nsName.length() > 0 )
4223 *stream << nsName << ":";
4224 *stream << attrName;
4225 if ( !attrValue.empty() ) // don't show ="" if empty
4226 *stream << "=\"" << attrValue << "\"";
4227 if ( attrName == "StyleSheet" ) { // gather linked css files
4228 lString32 cssFile = node->getDocument()->getAttrValue(attr->index);
4229 if (!cssFiles.contains(cssFile))
4230 cssFiles.add(cssFile);
4231 }
4232 }
4233 }
4234 if ( WNEFLAG(SHOW_REND_METHOD) ) {
4235 *stream << " ~";
4236 switch ( node->getRendMethod() ) {
4237 case erm_invisible: *stream << "X"; break;
4238 case erm_killed: *stream << "K"; break;
4239 case erm_block: *stream << "B"; break;
4240 case erm_final: *stream << "F"; break;
4241 case erm_inline: *stream << "i"; break;
4242 case erm_table: *stream << "T"; break;
4243 case erm_table_row_group: *stream << "TRG"; break;
4244 case erm_table_header_group: *stream << "THG"; break;
4245 case erm_table_footer_group: *stream << "TFG"; break;
4246 case erm_table_row: *stream << "TR"; break;
4247 case erm_table_column_group: *stream << "TCG"; break;
4248 case erm_table_column: *stream << "TC"; break;
4249 default: *stream << "?"; break;
4250 }
4251 }
4252 if ( node->getChildCount() == 0 ) {
4253 if ( elemName[0] == '?' )
4254 *stream << "?>";
4255 else
4256 *stream << "/>";
4257 }
4258 else {
4259 *stream << ">";
4260 if (doNewLineAfterStartTag)
4261 *stream << "\n";
4262 if ( ! isStylesheetTag ) {
4263 for ( int i=0; i<(int)node->getChildCount(); i++ ) {
4264 writeNodeEx( stream, node->getChildNode(i), cssFiles, wflags, startXP, endXP, indentBaseLevel );
4265 }
4266 }
4267 else {
4268 // We need to parse the stylesheet tag text to extract css files path.
4269 // We write its content without indentation and add a \n for readability.
4270 lString8 txt = node->getText8();
4271 int txtlen = txt.length();
4272 if (txtlen && txt.substr(txtlen-1) != "\n") {
4273 txt << "\n";
4274 }
4275 *stream << txt;
4276 // Parse @import'ed files to gather linked css files (we don't really need to
4277 // do recursive parsing of @import, which are very rare, we just want to get
4278 // the 2nd++ linked css files that were put there by crengine).
4279 const char * s = txt.c_str();
4280 while (true) {
4281 lString8 import_file;
4282 if ( ! LVProcessStyleSheetImport( s, import_file ) ) {
4283 break;
4284 }
4285 lString32 cssFile = LVCombinePaths( node->getAttributeValue(attr_href), Utf8ToUnicode(import_file) );
4286 if ( !cssFile.empty() && !cssFiles.contains(cssFile) ) {
4287 cssFiles.add(cssFile);
4288 }
4289 }
4290 }
4291 if (doNewLineBeforeEndTag)
4292 *stream << "\n";
4293 if (doIndentBeforeEndTag)
4294 for ( int i=indentBaseLevel; i<level; i++ )
4295 *stream << " ";
4296 *stream << "</" << elemName << ">";
4297 if ( WNEFLAG(TEXT_HYPHENATE) ) {
4298 // Additional minor formatting tweaks for when this is going to be fed
4299 // to some other renderer, which is usually when we request HYPHENATE.
4300 if ( has_styles_set && node->getStyle()->display == css_d_run_in ) {
4301 // For FB2 footnotes, add a space between the number and text,
4302 // as none might be present in the source. If there were some,
4303 // the other renderer will probably collapse them.
4304 *stream << " ";
4305 }
4306 }
4307 }
4308 if (doNewLineAfterEndTag)
4309 *stream << "\n";
4310 if (doIndentAfterNewLineAfterEndTag) {
4311 int ilevel = doIndentOneLevelLessAfterNewLineAfterEndTag ? level-1 : level;
4312 for ( int i=indentBaseLevel; i<ilevel; i++ )
4313 *stream << " ";
4314 }
4315 if ( containsEnd && WNEFLAG(NB_SKIPPED_NODES) ) {
4316 // Next siblings will not contain endXP and won't be written: show how many they are
4317 ldomNode * parent = node->getParentNode();
4318 int nbIgnoredFollowingSiblings = parent ? (parent->getChildCount() - 1 - node->getNodeIndex()) : 0;
4319 if (nbIgnoredFollowingSiblings) {
4320 if (doIndentBeforeEndTag)
4321 for ( int i=indentBaseLevel; i<level; i++ )
4322 *stream << " ";
4323 *stream << "[…" << lString8().appendDecimal(nbIgnoredFollowingSiblings) << "…]";
4324 if (doNewLineAfterEndTag)
4325 *stream << "\n";
4326 }
4327 }
4328 if ( isInitialNode && cssFiles.length()==0 && WNEFLAG(GET_CSS_FILES) && !node->isRoot() ) {
4329 // We have gathered CSS files as we walked the DOM, which we usually
4330 // do from the root node if we want CSS files.
4331 // In case we started from an inner node, and we are requested for
4332 // CSS files - but we have none - walk the DOM back to gather them.
4333 ldomNode *pnode = node->getParentNode();
4334 for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
4335 if ( pnode->getNodeId() == el_DocFragment || pnode->getNodeId() == el_body ) {
4336 // The CSS file in StyleSheet="" attribute was the first one seen by
4337 // crengine, so add it first to cssFiles
4338 if (pnode->hasAttribute(attr_StyleSheet) ) {
4339 lString32 cssFile = pnode->getAttributeValue(attr_StyleSheet);
4340 if (!cssFiles.contains(cssFile))
4341 cssFiles.add(cssFile);
4342 }
4343 // And then the CSS files in @import in the <stylesheet> element
4344 if ( pnode->getChildCount() > 0 ) {
4345 ldomNode *styleNode = pnode->getFirstChild();
4346 if ( styleNode && styleNode->getNodeId()==el_stylesheet ) {
4347 // Do as done above
4348 lString8 txt = pnode->getText8();
4349 const char * s = txt.c_str();
4350 while (true) {
4351 lString8 import_file;
4352 if ( ! LVProcessStyleSheetImport( s, import_file ) ) {
4353 break;
4354 }
4355 lString32 cssFile = LVCombinePaths( pnode->getAttributeValue(attr_href), Utf8ToUnicode(import_file) );
4356 if ( !cssFile.empty() && !cssFiles.contains(cssFile) ) {
4357 cssFiles.add(cssFile);
4358 }
4359 }
4360 }
4361 }
4362 }
4363 }
4364 }
4365 }
4366 }
4367
saveToStream(LVStreamRef stream,const char *,bool treeLayout)4368 bool ldomDocument::saveToStream( LVStreamRef stream, const char *, bool treeLayout )
4369 {
4370 //CRLog::trace("ldomDocument::saveToStream()");
4371 if (!stream || !getRootNode()->getChildCount())
4372 return false;
4373
4374 *stream.get() << UnicodeToLocal(cs32(U"\xFEFF"));
4375 writeNode( stream.get(), getRootNode(), treeLayout );
4376 return true;
4377 }
4378
printWarning(const char * msg,int warning_id)4379 void ldomDocument::printWarning(const char * msg, int warning_id) {
4380 // Provide a warning_id from 1 to 32 to have this warning emited only once
4381 // Provide 0 to have it printed it every time
4382 lUInt32 warning_bit = 0;
4383 if ( warning_id > 0 && warning_id <= 32 ) {
4384 warning_bit = 1 << (warning_id-1);
4385 }
4386 if ( !( warning_bit & _warnings_seen_bitmap) ) {
4387 printf("CRE WARNING: %s\n", msg);
4388 _warnings_seen_bitmap |= warning_bit;
4389 }
4390 }
4391
~ldomDocument()4392 ldomDocument::~ldomDocument()
4393 {
4394 #if BUILD_LITE!=1
4395 updateMap(); // NOLINT: Call to virtual function during destruction
4396 #endif
4397 fontMan->UnregisterDocumentFonts(_docIndex);
4398 ldomNode::unregisterDocument(this);
4399 }
4400
4401 #if BUILD_LITE!=1
4402
4403 class LVImportStylesheetParser
4404 {
4405 public:
LVImportStylesheetParser(ldomDocument * document)4406 LVImportStylesheetParser(ldomDocument *document) :
4407 _document(document), _nestingLevel(0)
4408 {
4409 }
4410
~LVImportStylesheetParser()4411 ~LVImportStylesheetParser()
4412 {
4413 _inProgress.clear();
4414 }
4415
Parse(lString32 cssFile)4416 bool Parse(lString32 cssFile)
4417 {
4418 bool ret = false;
4419 if ( cssFile.empty() )
4420 return ret;
4421
4422 lString32 codeBase = cssFile;
4423 LVExtractLastPathElement(codeBase);
4424 LVContainerRef container = _document->getContainer();
4425 if (!container.isNull()) {
4426 LVStreamRef cssStream = container->OpenStream(cssFile.c_str(), LVOM_READ);
4427 if (!cssStream.isNull()) {
4428 lString32 css;
4429 css << LVReadTextFile(cssStream);
4430 int offset = _inProgress.add(cssFile);
4431 ret = Parse(codeBase, css) || ret;
4432 _inProgress.erase(offset, 1);
4433 }
4434 }
4435 return ret;
4436 }
4437
Parse(lString32 codeBase,lString32 css)4438 bool Parse(lString32 codeBase, lString32 css)
4439 {
4440 bool ret = false;
4441 if ( css.empty() )
4442 return ret;
4443 lString8 css8 = UnicodeToUtf8(css);
4444 const char * s = css8.c_str();
4445
4446 _nestingLevel += 1;
4447 while (_nestingLevel < 11) { //arbitrary limit
4448 lString8 import_file;
4449
4450 if ( LVProcessStyleSheetImport( s, import_file ) ) {
4451 lString32 importFilename = LVCombinePaths( codeBase, Utf8ToUnicode(import_file) );
4452 if ( !importFilename.empty() && !_inProgress.contains(importFilename) ) {
4453 ret = Parse(importFilename) || ret;
4454 }
4455 } else {
4456 break;
4457 }
4458 }
4459 _nestingLevel -= 1;
4460 return (_document->getStyleSheet()->parse(s, false, codeBase) || ret);
4461 }
4462 private:
4463 ldomDocument *_document;
4464 lString32Collection _inProgress;
4465 int _nestingLevel;
4466 };
4467
4468 /// renders (formats) document in memory
setRenderProps(int width,int dy,bool,int,font_ref_t def_font,int def_interline_space,CRPropRef props)4469 bool ldomDocument::setRenderProps( int width, int dy, bool /*showCover*/, int /*y0*/, font_ref_t def_font, int def_interline_space, CRPropRef props )
4470 {
4471 // Note: def_interline_space is no more used here
4472 bool changed = false;
4473 // Don't clear this cache of LFormattedText if
4474 // render props don't change.
4475 // _renderedBlockCache.clear();
4476 changed = _imgScalingOptions.update(props, def_font->getSize()) || changed;
4477 css_style_ref_t s( new css_style_rec_t );
4478 s->display = css_d_block;
4479 s->white_space = css_ws_normal;
4480 s->text_align = css_ta_start;
4481 s->text_align_last = css_ta_auto;
4482 s->text_decoration = css_td_none;
4483 s->text_transform = css_tt_none;
4484 s->hyphenate = css_hyph_auto;
4485 s->color.type = css_val_unspecified;
4486 s->color.value = 0x000000;
4487 s->background_color.type = css_val_unspecified;
4488 s->background_color.value = 0xFFFFFF;
4489 //_def_style->background_color.type = color;
4490 //_def_style->background_color.value = 0xFFFFFF;
4491 s->page_break_before = css_pb_auto;
4492 s->page_break_after = css_pb_auto;
4493 s->page_break_inside = css_pb_auto;
4494 s->list_style_type = css_lst_disc;
4495 s->list_style_position = css_lsp_outside;
4496 s->vertical_align.type = css_val_unspecified;
4497 s->vertical_align.value = css_va_baseline;
4498 s->font_family = def_font->getFontFamily();
4499 s->font_size.type = css_val_screen_px; // we use this type, as we got the real font size from FontManager
4500 s->font_size.value = def_font->getSize();
4501 s->font_name = def_font->getTypeFace();
4502 s->font_weight = css_fw_400;
4503 s->font_style = css_fs_normal;
4504 s->font_features.type = css_val_unspecified;
4505 s->font_features.value = 0;
4506 s->text_indent.type = css_val_px;
4507 s->text_indent.value = 0;
4508 // s->line_height.type = css_val_percent;
4509 // s->line_height.value = def_interline_space << 8;
4510 s->line_height.type = css_val_unspecified;
4511 s->line_height.value = css_generic_normal; // line-height: normal
4512 s->orphans = css_orphans_widows_1; // default to allow orphans and widows
4513 s->widows = css_orphans_widows_1;
4514 s->float_ = css_f_none;
4515 s->clear = css_c_none;
4516 s->direction = css_dir_inherit;
4517 s->cr_hint.type = css_val_unspecified;
4518 s->cr_hint.value = CSS_CR_HINT_NONE;
4519 //lUInt32 defStyleHash = (((_stylesheet.getHash() * 31) + calcHash(_def_style))*31 + calcHash(_def_font));
4520 //defStyleHash = defStyleHash * 31 + getDocFlags();
4521 if ( _last_docflags != getDocFlags() ) {
4522 CRLog::trace("ldomDocument::setRenderProps() - doc flags changed");
4523 _last_docflags = getDocFlags();
4524 changed = true;
4525 }
4526 if ( calcHash(_def_style) != calcHash(s) ) {
4527 CRLog::trace("ldomDocument::setRenderProps() - style is changed");
4528 _def_style = s;
4529 changed = true;
4530 }
4531 if ( calcHash(_def_font) != calcHash(def_font)) {
4532 CRLog::trace("ldomDocument::setRenderProps() - font is changed");
4533 _def_font = def_font;
4534 changed = true;
4535 }
4536 if ( _page_height != dy && dy > 0 ) {
4537 CRLog::trace("ldomDocument::setRenderProps() - page height is changed: %d != %d", _page_height, dy);
4538 _page_height = dy;
4539 changed = true;
4540 }
4541 if ( _page_width != width && width > 0 ) {
4542 CRLog::trace("ldomDocument::setRenderProps() - page width is changed");
4543 _page_width = width;
4544 changed = true;
4545 }
4546 // {
4547 // lUInt32 styleHash = calcStyleHash();
4548 // styleHash = styleHash * 31 + calcGlobalSettingsHash();
4549 // CRLog::debug("Style hash before set root style: %x", styleHash);
4550 // }
4551 // getRootNode()->setFont( _def_font );
4552 // getRootNode()->setStyle( _def_style );
4553 // {
4554 // lUInt32 styleHash = calcStyleHash();
4555 // styleHash = styleHash * 31 + calcGlobalSettingsHash();
4556 // CRLog::debug("Style hash after set root style: %x", styleHash);
4557 // }
4558 return changed;
4559 }
4560
dropStyles()4561 void tinyNodeCollection::dropStyles()
4562 {
4563 _styles.clear(-1);
4564 _fonts.clear(-1);
4565 resetNodeNumberingProps();
4566
4567 int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
4568 for ( int i=0; i<count; i++ ) {
4569 int offs = i*TNC_PART_LEN;
4570 int sz = TNC_PART_LEN;
4571 if ( offs + sz > _elemCount+1 ) {
4572 sz = _elemCount+1 - offs;
4573 }
4574 ldomNode * buf = _elemList[i];
4575 for ( int j=0; j<sz; j++ ) {
4576 if ( buf[j].isElement() ) {
4577 setNodeStyleIndex( buf[j]._handle._dataIndex, 0 );
4578 setNodeFontIndex( buf[j]._handle._dataIndex, 0 );
4579 }
4580 }
4581 }
4582 _nodeStyleHash = 0;
4583 }
4584
calcFinalBlocks()4585 int tinyNodeCollection::calcFinalBlocks()
4586 {
4587 int cnt = 0;
4588 int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
4589 for ( int i=0; i<count; i++ ) {
4590 int offs = i*TNC_PART_LEN;
4591 int sz = TNC_PART_LEN;
4592 if ( offs + sz > _elemCount+1 ) {
4593 sz = _elemCount+1 - offs;
4594 }
4595 ldomNode * buf = _elemList[i];
4596 for ( int j=0; j<sz; j++ ) {
4597 if ( buf[j].isElement() ) {
4598 int rm = buf[j].getRendMethod();
4599 if ( rm==erm_final )
4600 cnt++;
4601 }
4602 }
4603 }
4604 return cnt;
4605 }
4606
4607 // This is mostly only useful for FB2 stylesheet, as we no more set
4608 // anything in _docStylesheetFileName
applyDocumentStyleSheet()4609 void ldomDocument::applyDocumentStyleSheet()
4610 {
4611 if ( !getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) {
4612 CRLog::trace("applyDocumentStyleSheet() : DOC_FLAG_ENABLE_INTERNAL_STYLES is disabled");
4613 return;
4614 }
4615 if ( !_docStylesheetFileName.empty() ) {
4616 if ( getContainer().isNull() )
4617 return;
4618 if ( parseStyleSheet(_docStylesheetFileName) ) {
4619 CRLog::debug("applyDocumentStyleSheet() : Using document stylesheet from link/stylesheet from %s",
4620 LCSTR(_docStylesheetFileName));
4621 }
4622 } else {
4623 ldomXPointer ss = createXPointer(cs32("/FictionBook/stylesheet"));
4624 if ( !ss.isNull() ) {
4625 lString32 css = ss.getText('\n');
4626 if ( !css.empty() ) {
4627 CRLog::debug("applyDocumentStyleSheet() : Using internal FB2 document stylesheet:\n%s", LCSTR(css));
4628 _stylesheet.parse(LCSTR(css));
4629 } else {
4630 CRLog::trace("applyDocumentStyleSheet() : stylesheet under /FictionBook/stylesheet is empty");
4631 }
4632 } else {
4633 CRLog::trace("applyDocumentStyleSheet() : No internal FB2 stylesheet found under /FictionBook/stylesheet");
4634 }
4635 }
4636 }
4637
parseStyleSheet(lString32 codeBase,lString32 css)4638 bool ldomDocument::parseStyleSheet(lString32 codeBase, lString32 css)
4639 {
4640 LVImportStylesheetParser parser(this);
4641 return parser.Parse(codeBase, css);
4642 }
4643
parseStyleSheet(lString32 cssFile)4644 bool ldomDocument::parseStyleSheet(lString32 cssFile)
4645 {
4646 LVImportStylesheetParser parser(this);
4647 return parser.Parse(cssFile);
4648 }
4649
render(LVRendPageList * pages,LVDocViewCallback * callback,int width,int dy,bool showCover,int y0,font_ref_t def_font,int def_interline_space,CRPropRef props,int usable_left_overflow,int usable_right_overflow)4650 bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy,
4651 bool showCover, int y0, font_ref_t def_font, int def_interline_space,
4652 CRPropRef props, int usable_left_overflow, int usable_right_overflow )
4653 {
4654 CRLog::info("Render is called for width %d, pageHeight=%d, fontFace=%s, docFlags=%d", width, dy, def_font->getTypeFace().c_str(), getDocFlags() );
4655 CRLog::trace("initializing default style...");
4656 //persist();
4657 // {
4658 // lUInt32 styleHash = calcStyleHash();
4659 // styleHash = styleHash * 31 + calcGlobalSettingsHash();
4660 // CRLog::debug("Style hash before setRenderProps: %x", styleHash);
4661 // } //bool propsChanged =
4662 setRenderProps( width, dy, showCover, y0, def_font, def_interline_space, props );
4663
4664 // update styles
4665 // if ( getRootNode()->getStyle().isNull() || getRootNode()->getFont().isNull()
4666 // || _docFlags != _hdr.render_docflags
4667 // || width!=_hdr.render_dx || dy!=_hdr.render_dy || defStyleHash!=_hdr.stylesheet_hash ) {
4668 // CRLog::trace("init format data...");
4669 // getRootNode()->recurseElements( initFormatData );
4670 // } else {
4671 // CRLog::trace("reusing existing format data...");
4672 // }
4673
4674 bool was_just_rendered_from_cache = _just_rendered_from_cache; // cleared by checkRenderContext()
4675 if ( !checkRenderContext() ) {
4676 if ( _nodeDisplayStyleHashInitial == NODE_DISPLAY_STYLE_HASH_UNINITIALIZED ) { // happen when just loaded
4677 // For knowing/debugging cases when node styles set up during loading
4678 // is invalid (should happen now only when EPUB has embedded fonts
4679 // or some pseudoclass like :last-child has been met).
4680 printf("CRE: styles re-init needed after load, re-rendering\n");
4681 }
4682 CRLog::info("rendering context is changed - full render required...");
4683 // Clear LFormattedTextRef cache
4684 _renderedBlockCache.clear();
4685 CRLog::trace("init format data...");
4686 //CRLog::trace("validate 1...");
4687 //validateDocument();
4688 CRLog::trace("Dropping existing styles...");
4689 //CRLog::debug( "root style before drop style %d", getNodeStyleIndex(getRootNode()->getDataIndex()));
4690 dropStyles();
4691 //CRLog::debug( "root style after drop style %d", getNodeStyleIndex(getRootNode()->getDataIndex()));
4692
4693 // After having dropped styles, which should have dropped most references
4694 // to fonts instances, we want to drop these fonts instances.
4695 // Mostly because some fallback fonts, possibly synthetized (fake bold and
4696 // italic) may have been instantiated in the late phase of text rendering.
4697 // We don't want such instances to be used for styles as it could cause some
4698 // cache check issues (perpetual "style hash mismatch", as these synthetised
4699 // fonts would not yet be there when loading from cache).
4700 // We need 2 gc() for a complete cleanup. The performance impact of
4701 // reinstantiating the fonts is minimal.
4702 gc(); // drop font instances that were only referenced by dropped styles
4703 gc(); // drop fallback font instances that were only referenced by dropped fonts
4704
4705 //ldomNode * root = getRootNode();
4706 //css_style_ref_t roots = root->getStyle();
4707 //CRLog::trace("validate 2...");
4708 //validateDocument();
4709
4710 // Reset counters (quotes nesting levels...)
4711 TextLangMan::resetCounters();
4712
4713 CRLog::trace("Save stylesheet...");
4714 _stylesheet.push();
4715 CRLog::trace("Init node styles...");
4716 applyDocumentStyleSheet();
4717 getRootNode()->initNodeStyleRecursive( callback );
4718 CRLog::trace("Restoring stylesheet...");
4719 _stylesheet.pop();
4720
4721 CRLog::trace("init render method...");
4722 getRootNode()->initNodeRendMethodRecursive();
4723
4724 // getRootNode()->setFont( _def_font );
4725 // getRootNode()->setStyle( _def_style );
4726 updateRenderContext();
4727
4728 // DEBUG dump of render methods
4729 //dumpRendMethods( getRootNode(), cs32(" - ") );
4730 // lUInt32 styleHash = calcStyleHash();
4731 // styleHash = styleHash * 31 + calcGlobalSettingsHash();
4732 // CRLog::debug("Style hash: %x", styleHash);
4733
4734 _rendered = false;
4735 }
4736 if ( !_rendered ) {
4737 if ( callback ) {
4738 callback->OnFormatStart();
4739 }
4740 _renderedBlockCache.reduceSize(1); // Reduce size to save some checking and trashing time
4741 setCacheFileStale(true); // new rendering: cache file will be updated
4742 _toc_from_cache_valid = false;
4743 // force recalculation of page numbers (even if not computed in this
4744 // session, they will be when loaded from cache next session)
4745 m_toc.invalidatePageNumbers();
4746 m_pagemap.invalidatePageInfo();
4747 pages->clear();
4748 if ( showCover )
4749 pages->add( new LVRendPageInfo( _page_height ) );
4750 LVRendPageContext context( pages, _page_height );
4751 int numFinalBlocks = calcFinalBlocks();
4752 CRLog::info("Final block count: %d", numFinalBlocks);
4753 context.setCallback(callback, numFinalBlocks);
4754 //updateStyles();
4755 CRLog::trace("rendering...");
4756 renderBlockElement( context, getRootNode(), 0, y0, width, usable_left_overflow, usable_right_overflow );
4757 _rendered = true;
4758 #if 0 //def _DEBUG
4759 LVStreamRef ostream = LVOpenFileStream( "test_save_after_init_rend_method.xml", LVOM_WRITE );
4760 saveToStream( ostream, "utf-16" );
4761 #endif
4762 gc();
4763 CRLog::trace("finalizing... fonts.length=%d", _fonts.length());
4764 context.Finalize();
4765 updateRenderContext();
4766 _pagesData.reset();
4767 pages->serialize( _pagesData );
4768 _renderedBlockCache.restoreSize(); // Restore original cache size
4769
4770 if ( _nodeDisplayStyleHashInitial == NODE_DISPLAY_STYLE_HASH_UNINITIALIZED ) {
4771 // If _nodeDisplayStyleHashInitial has not been initialized from its
4772 // former value from the cache file, we use the one computed (just
4773 // above in updateRenderContext()) after the first full rendering
4774 // (which has applied styles and created the needed autoBoxing nodes
4775 // in the DOM). It is coherent with the DOM built up to now.
4776 _nodeDisplayStyleHashInitial = _nodeDisplayStyleHash;
4777 CRLog::info("Initializing _nodeDisplayStyleHashInitial after first rendering: %x", _nodeDisplayStyleHashInitial);
4778 // We also save it directly into DocFileHeader _hdr (normally,
4779 // updateRenderContext() does this, but doing it here avoids
4780 // a call and an expensive CalcStyleHash)
4781 _hdr.node_displaystyle_hash = _nodeDisplayStyleHashInitial;
4782 }
4783
4784 if ( callback ) {
4785 callback->OnFormatEnd();
4786 callback->OnDocumentReady();
4787 }
4788
4789 //saveChanges();
4790
4791 //persist();
4792 dumpStatistics();
4793
4794 return true; // full (re-)rendering done
4795
4796 } else {
4797 CRLog::info("rendering context is not changed - no render!");
4798 if ( _pagesData.pos() ) {
4799 _pagesData.setPos(0);
4800 pages->deserialize( _pagesData );
4801 }
4802 CRLog::info("%d rendered pages found", pages->length() );
4803
4804 if ( was_just_rendered_from_cache && callback )
4805 callback->OnDocumentReady();
4806
4807 return false; // no (re-)rendering needed
4808 }
4809
4810 }
4811 #endif
4812
setNodeTypes(const elem_def_t * node_scheme)4813 void lxmlDocBase::setNodeTypes( const elem_def_t * node_scheme )
4814 {
4815 if ( !node_scheme )
4816 return;
4817 for ( ; node_scheme && node_scheme->id != 0; ++node_scheme )
4818 {
4819 _elementNameTable.AddItem(
4820 node_scheme->id, // ID
4821 lString32(node_scheme->name), // Name
4822 &node_scheme->props ); // ptr
4823 }
4824 }
4825
4826 // set attribute types from table
setAttributeTypes(const attr_def_t * attr_scheme)4827 void lxmlDocBase::setAttributeTypes( const attr_def_t * attr_scheme )
4828 {
4829 if ( !attr_scheme )
4830 return;
4831 for ( ; attr_scheme && attr_scheme->id != 0; ++attr_scheme )
4832 {
4833 _attrNameTable.AddItem(
4834 attr_scheme->id, // ID
4835 lString32(attr_scheme->name), // Name
4836 NULL);
4837 }
4838 _idAttrId = _attrNameTable.idByName("id");
4839 }
4840
4841 // set namespace types from table
setNameSpaceTypes(const ns_def_t * ns_scheme)4842 void lxmlDocBase::setNameSpaceTypes( const ns_def_t * ns_scheme )
4843 {
4844 if ( !ns_scheme )
4845 return;
4846 for ( ; ns_scheme && ns_scheme->id != 0; ++ns_scheme )
4847 {
4848 _nsNameTable.AddItem(
4849 ns_scheme->id, // ID
4850 lString32(ns_scheme->name), // Name
4851 NULL);
4852 }
4853 }
4854
dumpUnknownEntities(const char * fname)4855 void lxmlDocBase::dumpUnknownEntities( const char * fname )
4856 {
4857 FILE * f = fopen( fname, "wt" );
4858 if ( !f )
4859 return;
4860 fprintf(f, "Unknown elements:\n");
4861 _elementNameTable.dumpUnknownItems(f, UNKNOWN_ELEMENT_TYPE_ID);
4862 fprintf(f, "-------------------------------\n");
4863 fprintf(f, "Unknown attributes:\n");
4864 _attrNameTable.dumpUnknownItems(f, UNKNOWN_ATTRIBUTE_TYPE_ID);
4865 fprintf(f, "-------------------------------\n");
4866 fprintf(f, "Unknown namespaces:\n");
4867 _nsNameTable.dumpUnknownItems(f, UNKNOWN_NAMESPACE_TYPE_ID);
4868 fprintf(f, "-------------------------------\n");
4869 fclose(f);
4870 }
4871
getUnknownEntities()4872 lString32Collection lxmlDocBase::getUnknownEntities()
4873 {
4874 lString32Collection unknown_entities;
4875 unknown_entities.add( _elementNameTable.getUnknownItems(UNKNOWN_ELEMENT_TYPE_ID) );
4876 unknown_entities.add( _attrNameTable.getUnknownItems(UNKNOWN_ATTRIBUTE_TYPE_ID) );
4877 unknown_entities.add( _nsNameTable.getUnknownItems(UNKNOWN_NAMESPACE_TYPE_ID) );
4878 return unknown_entities;
4879 }
4880
4881
4882 #if BUILD_LITE!=1
4883 static const char * id_map_list_magic = "MAPS";
4884 static const char * elem_id_map_magic = "ELEM";
4885 static const char * attr_id_map_magic = "ATTR";
4886 static const char * attr_value_map_magic = "ATTV";
4887 static const char * ns_id_map_magic = "NMSP";
4888 static const char * node_by_id_map_magic = "NIDM";
4889
4890 typedef struct {
4891 lUInt32 key;
4892 lUInt32 value;
4893 } id_node_map_item;
4894
compare_id_node_map_items(const void * item1,const void * item2)4895 int compare_id_node_map_items(const void * item1, const void * item2) {
4896 id_node_map_item * v1 = (id_node_map_item*)item1;
4897 id_node_map_item * v2 = (id_node_map_item*)item2;
4898 if (v1->key > v2->key)
4899 return 1;
4900 if (v1->key < v2->key)
4901 return -1;
4902 return 0;
4903 }
4904
4905 /// serialize to byte array (pointer will be incremented by number of bytes written)
serializeMaps(SerialBuf & buf)4906 void lxmlDocBase::serializeMaps( SerialBuf & buf )
4907 {
4908 if ( buf.error() )
4909 return;
4910 int pos = buf.pos();
4911 buf.putMagic( id_map_list_magic );
4912 buf.putMagic( elem_id_map_magic );
4913 _elementNameTable.serialize( buf );
4914 buf << _nextUnknownElementId; // Next Id for unknown element
4915 buf.putMagic( attr_id_map_magic );
4916 _attrNameTable.serialize( buf );
4917 buf << _nextUnknownAttrId; // Next Id for unknown attribute
4918 buf.putMagic( ns_id_map_magic );
4919 _nsNameTable.serialize( buf );
4920 buf << _nextUnknownNsId; // Next Id for unknown namespace
4921 buf.putMagic( attr_value_map_magic );
4922 _attrValueTable.serialize( buf );
4923
4924 int start = buf.pos();
4925 buf.putMagic( node_by_id_map_magic );
4926 lUInt32 cnt = 0;
4927 {
4928 LVHashTable<lUInt32,lInt32>::iterator ii = _idNodeMap.forwardIterator();
4929 for ( LVHashTable<lUInt32,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
4930 cnt++;
4931 }
4932 }
4933 // TODO: investigate why length() doesn't work as count
4934 if ( (int)cnt!=_idNodeMap.length() )
4935 CRLog::error("_idNodeMap.length=%d doesn't match real item count %d", _idNodeMap.length(), cnt);
4936 buf << cnt;
4937 if (cnt > 0)
4938 {
4939 // sort items before serializing!
4940 id_node_map_item * array = new id_node_map_item[cnt];
4941 int i = 0;
4942 LVHashTable<lUInt32,lInt32>::iterator ii = _idNodeMap.forwardIterator();
4943 for ( LVHashTable<lUInt32,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
4944 array[i].key = (lUInt32)p->key;
4945 array[i].value = (lUInt32)p->value;
4946 i++;
4947 }
4948 qsort(array, cnt, sizeof(id_node_map_item), &compare_id_node_map_items);
4949 for (i = 0; i < (int)cnt; i++)
4950 buf << array[i].key << array[i].value;
4951 delete[] array;
4952 }
4953 buf.putMagic( node_by_id_map_magic );
4954 buf.putCRC( buf.pos() - start );
4955
4956 buf.putCRC( buf.pos() - pos );
4957 }
4958
4959 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserializeMaps(SerialBuf & buf)4960 bool lxmlDocBase::deserializeMaps( SerialBuf & buf )
4961 {
4962 if ( buf.error() )
4963 return false;
4964 int pos = buf.pos();
4965 buf.checkMagic( id_map_list_magic );
4966 buf.checkMagic( elem_id_map_magic );
4967 _elementNameTable.deserialize( buf );
4968 buf >> _nextUnknownElementId; // Next Id for unknown element
4969
4970 if ( buf.error() ) {
4971 CRLog::error("Error while deserialization of Element ID map");
4972 return false;
4973 }
4974
4975 buf.checkMagic( attr_id_map_magic );
4976 _attrNameTable.deserialize( buf );
4977 buf >> _nextUnknownAttrId; // Next Id for unknown attribute
4978
4979 if ( buf.error() ) {
4980 CRLog::error("Error while deserialization of Attr ID map");
4981 return false;
4982 }
4983
4984
4985 buf.checkMagic( ns_id_map_magic );
4986 _nsNameTable.deserialize( buf );
4987 buf >> _nextUnknownNsId; // Next Id for unknown namespace
4988
4989 if ( buf.error() ) {
4990 CRLog::error("Error while deserialization of NS ID map");
4991 return false;
4992 }
4993
4994 buf.checkMagic( attr_value_map_magic );
4995 _attrValueTable.deserialize( buf );
4996
4997 if ( buf.error() ) {
4998 CRLog::error("Error while deserialization of AttrValue map");
4999 return false;
5000 }
5001
5002 int start = buf.pos();
5003 buf.checkMagic( node_by_id_map_magic );
5004 lUInt32 idmsize;
5005 buf >> idmsize;
5006 _idNodeMap.clear();
5007 if ( idmsize < 20000 )
5008 _idNodeMap.resize( idmsize*2 );
5009 for ( unsigned i=0; i<idmsize; i++ ) {
5010 lUInt32 key;
5011 lUInt32 value;
5012 buf >> key;
5013 buf >> value;
5014 _idNodeMap.set( key, value );
5015 if ( buf.error() )
5016 return false;
5017 }
5018 buf.checkMagic( node_by_id_map_magic );
5019
5020 if ( buf.error() ) {
5021 CRLog::error("Error while deserialization of ID->Node map");
5022 return false;
5023 }
5024
5025 buf.checkCRC( buf.pos() - start );
5026
5027 if ( buf.error() ) {
5028 CRLog::error("Error while deserialization of ID->Node map - CRC check failed");
5029 return false;
5030 }
5031
5032 buf.checkCRC( buf.pos() - pos );
5033
5034 return !buf.error();
5035 }
5036 #endif
5037
IsEmptySpace(const lChar32 * text,int len)5038 bool IsEmptySpace( const lChar32 * text, int len )
5039 {
5040 for (int i=0; i<len; i++)
5041 if ( text[i]!=' ' && text[i]!='\r' && text[i]!='\n' && text[i]!='\t')
5042 return false;
5043 return true;
5044 }
5045
5046
5047 /////////////////////////////////////////////////////////////////
5048 /// lxmlElementWriter
5049
5050 static bool IS_FIRST_BODY = false;
5051
ldomElementWriter(ldomDocument * document,lUInt16 nsid,lUInt16 id,ldomElementWriter * parent,bool insert_before_last_child)5052 ldomElementWriter::ldomElementWriter(ldomDocument * document, lUInt16 nsid, lUInt16 id, ldomElementWriter * parent, bool insert_before_last_child)
5053 : _parent(parent), _document(document), _tocItem(NULL), _isBlock(true), _isSection(false),
5054 _stylesheetIsSet(false), _bodyEnterCalled(false), _pseudoElementAfterChildIndex(-1)
5055 {
5056 //logfile << "{c";
5057 _typeDef = _document->getElementTypePtr( id );
5058 _flags = 0;
5059 if ( (_typeDef && _typeDef->white_space >= css_ws_pre_line) || (_parent && _parent->getFlags()&TXTFLG_PRE) )
5060 _flags |= TXTFLG_PRE; // Parse as PRE: pre-line, pre, pre-wrap and break-spaces
5061 // This will be updated in ldomElementWriter::onBodyEnter() after we have
5062 // set styles to this node, so we'll get the real white_space value to use.
5063
5064 _isSection = (id==el_section);
5065
5066 // Default (for elements not specified in fb2def.h) is to allow text
5067 // (except for the root node which must have children)
5068 _allowText = _typeDef ? _typeDef->allow_text : (_parent?true:false);
5069 if (_document->getDOMVersionRequested() < 20180528) { // revert what was changed 20180528
5070 // <hr>, <ul>, <ol>, <dl>, <output>, <section>, <svg> didn't allow text
5071 if ( id==el_hr || id==el_ul || id==el_ol || id==el_dl ||
5072 id==el_output || id==el_section || id==el_svg ) {
5073 _allowText = false;
5074 }
5075 // <code> was white-space: pre
5076 if ( id==el_code ) {
5077 _flags |= TXTFLG_PRE;
5078 }
5079 }
5080
5081 if (_parent) {
5082 lUInt32 index = _parent->getElement()->getChildCount();
5083 if ( insert_before_last_child )
5084 index--;
5085 _element = _parent->getElement()->insertChildElement( index, nsid, id );
5086 }
5087 else
5088 _element = _document->getRootNode(); //->insertChildElement( (lUInt32)-1, nsid, id );
5089 if ( id==el_body ) {
5090 if ( IS_FIRST_BODY ) {
5091 _tocItem = _document->getToc();
5092 //_tocItem->clear();
5093 IS_FIRST_BODY = false;
5094 }
5095 else {
5096 int fmt = _document->getProps()->getIntDef(DOC_PROP_FILE_FORMAT_ID, doc_format_none);
5097 if ( fmt == doc_format_fb2 || fmt == doc_format_fb3 ) {
5098 // Add FB2 2nd++ BODYs' titles (footnotes and endnotes) in the TOC
5099 // (but not their own children that are <section>)
5100 _isSection = true; // this is just to have updateTocItem() called
5101 // Also add the "NonLinear" attribute so these other BODYs are flagged
5102 // as non-linear and can be hidden by frontend code that handles this
5103 // (this is actually suggested by the FB2 specs: "... multiple
5104 // bodies are used for additional information, like footnotes,
5105 // that do not appear in the main book flow. The first body is
5106 // presented to the reader by default, and content in the other
5107 // bodies should be accessible by hyperlinks.")
5108 addAttribute( 0, attr_NonLinear, U"" );
5109 }
5110 }
5111 }
5112 //logfile << "}";
5113 }
5114
getFlags()5115 lUInt32 ldomElementWriter::getFlags()
5116 {
5117 return _flags;
5118 }
5119
isBlockNode(ldomNode * node)5120 static bool isBlockNode( ldomNode * node )
5121 {
5122 if ( !node->isElement() )
5123 return false;
5124 #if BUILD_LITE!=1
5125 if ( node->getStyle()->display <= css_d_inline || node->getStyle()->display == css_d_none ) {
5126 return false;
5127 }
5128 return true;
5129 #else
5130 return true;
5131 #endif
5132 }
5133
isInlineNode(ldomNode * node)5134 static bool isInlineNode( ldomNode * node )
5135 {
5136 if ( node->isText() )
5137 return true;
5138 //int d = node->getStyle()->display;
5139 //return ( d==css_d_inline || d==css_d_run_in );
5140 int m = node->getRendMethod();
5141 return m == erm_inline;
5142 }
5143
isFloatingNode(ldomNode * node)5144 static bool isFloatingNode( ldomNode * node )
5145 {
5146 if ( node->isText() )
5147 return false;
5148 return node->getStyle()->float_ > css_f_none;
5149 }
5150
isNotBoxWrappingNode(ldomNode * node)5151 static bool isNotBoxWrappingNode( ldomNode * node )
5152 {
5153 if ( BLOCK_RENDERING_N(node, PREPARE_FLOATBOXES) && node->getStyle()->float_ > css_f_none )
5154 return false; // floatBox
5155 // isBoxingInlineBox() already checks for BLOCK_RENDERING_BOX_INLINE_BLOCKS)
5156 return !node->isBoxingInlineBox();
5157 }
5158
isNotBoxingInlineBoxNode(ldomNode * node)5159 static bool isNotBoxingInlineBoxNode( ldomNode * node )
5160 {
5161 return !node->isBoxingInlineBox();
5162 }
5163
getSectionHeader(ldomNode * section)5164 static lString32 getSectionHeader( ldomNode * section )
5165 {
5166 lString32 header;
5167 if ( !section || section->getChildCount() == 0 )
5168 return header;
5169 ldomNode * child = section->getChildElementNode(0, U"title");
5170 if ( !child )
5171 return header;
5172 header = child->getText(U' ', 1024);
5173 return header;
5174 }
5175
getPath()5176 lString32 ldomElementWriter::getPath()
5177 {
5178 if ( !_path.empty() || _element->isRoot() )
5179 return _path;
5180 _path = _parent->getPath() + "/" + _element->getXPathSegment();
5181 return _path;
5182 }
5183
updateTocItem()5184 void ldomElementWriter::updateTocItem()
5185 {
5186 if ( !_isSection )
5187 return;
5188 if ( !_parent )
5189 return;
5190 if ( _parent->_tocItem ) { // <section> in the first <body>
5191 lString32 title = getSectionHeader( _element );
5192 //CRLog::trace("TOC ITEM: %s", LCSTR(title));
5193 _tocItem = _parent->_tocItem->addChild(title, ldomXPointer(_element,0), getPath() );
5194 }
5195 else if ( getElement()->getNodeId() == el_body ) { // 2nd, 3rd... <body>, in FB2 documents
5196 lString32 title = getSectionHeader( _element );
5197 _document->getToc()->addChild(title, ldomXPointer(_element,0), getPath() );
5198 }
5199 _isSection = false;
5200 }
5201
onBodyEnter()5202 void ldomElementWriter::onBodyEnter()
5203 {
5204 _bodyEnterCalled = true;
5205 #if BUILD_LITE!=1
5206 //CRLog::trace("onBodyEnter() for node %04x %s", _element->getDataIndex(), LCSTR(_element->getNodeName()));
5207 if ( _document->isDefStyleSet() && _element ) {
5208 _element->initNodeStyle();
5209 // if ( _element->getStyle().isNull() ) {
5210 // CRLog::error("error while style initialization of element %x %s", _element->getNodeIndex(), LCSTR(_element->getNodeName()) );
5211 // crFatalError();
5212 // }
5213 int nb_children = _element->getChildCount();
5214 if ( nb_children > 0 ) {
5215 // The only possibility for this element being built to have children
5216 // is if the above initNodeStyle() has applied to this node some
5217 // matching selectors that had ::before or ::after, which have then
5218 // created one or two pseudoElem children. But let's be sure of that.
5219 for ( int i=0; i<nb_children; i++ ) {
5220 ldomNode * child = _element->getChildNode(i);
5221 if ( child->getNodeId() == el_pseudoElem ) {
5222 if ( child->hasAttribute(attr_Before) ) {
5223 // The "Before" pseudo element (not part of the XML)
5224 // needs to have its style applied. As it has no
5225 // children, we can also init its rend method.
5226 child->initNodeStyle();
5227 child->initNodeRendMethod();
5228 }
5229 else if ( child->hasAttribute(attr_After) ) {
5230 // For the "After" pseudo element, we need to wait
5231 // for all real children to be added, to move it
5232 // as its right position (last), to init its style
5233 // (because of "content:close-quote", whose nested
5234 // level need to have seen all previous nodes to
5235 // be accurate) and its rendering method.
5236 // We'll do that in onBodyExit() when called for
5237 // this node.
5238 _pseudoElementAfterChildIndex = i;
5239 }
5240 }
5241 }
5242 }
5243 _isBlock = isBlockNode(_element);
5244 // If initNodeStyle() has set "white-space: pre" or alike, update _flags
5245 if ( _element->getStyle()->white_space >= css_ws_pre_line) {
5246 _flags |= TXTFLG_PRE;
5247 }
5248 else {
5249 _flags &= ~TXTFLG_PRE;
5250 }
5251 } else {
5252 }
5253 if ( _isSection ) {
5254 if ( _parent && _parent->_isSection ) {
5255 _parent->updateTocItem();
5256 }
5257
5258 }
5259 #endif
5260 }
5261
ensurePseudoElement(bool is_before)5262 void ldomNode::ensurePseudoElement( bool is_before ) {
5263 #if BUILD_LITE!=1
5264 // This node should have that pseudoElement, but it might already be there,
5265 // so check if there is already one, and if not, create it.
5266 // This happens usually in the initial loading phase, but it might in
5267 // a re-rendering if the pseudo element is introduced by a change in
5268 // styles (we won't be able to create a node if there's a cache file).
5269 int insertChildIndex = -1;
5270 int nb_children = getChildCount();
5271 if ( is_before ) { // ::before
5272 insertChildIndex = 0; // always to be inserted first, if not already there
5273 if ( nb_children > 0 ) {
5274 ldomNode * child = getChildNode(0); // should always be found as the first node
5275 // pseudoElem might have been wrapped by a inlineBox, autoBoxing, floatBox...
5276 while ( child && child->isBoxingNode() && child->getChildCount()>0 )
5277 child = child->getChildNode(0);
5278 if ( child && child->getNodeId() == el_pseudoElem && child->hasAttribute(attr_Before) ) {
5279 // Already there, no need to create it
5280 insertChildIndex = -1;
5281 }
5282 }
5283 }
5284 else { // ::after
5285 // In the XML loading phase, this one might be either first,
5286 // or second if there's already a Before. In the re-rendering
5287 // phase, it would have been moved as the last node. In all these
5288 // cases, it is always the last at the moment we are checking.
5289 insertChildIndex = nb_children; // always to be inserted last, if not already there
5290 if ( nb_children > 0 ) {
5291 ldomNode * child = getChildNode(nb_children-1); // should always be found as the last node
5292 // pseudoElem might have been wrapped by a inlineBox, autoBoxing, floatBox...
5293 while ( child && child->isBoxingNode() && child->getChildCount()>0 )
5294 child = child->getChildNode(0);
5295 if ( child && child->getNodeId() == el_pseudoElem && child->hasAttribute(attr_After) ) {
5296 // Already there, no need to create it
5297 insertChildIndex = -1;
5298 }
5299 }
5300 }
5301 if ( insertChildIndex >= 0 ) {
5302 ldomNode * pseudo = insertChildElement( insertChildIndex, LXML_NS_NONE, el_pseudoElem );
5303 lUInt16 attribute_id = is_before ? attr_Before : attr_After;
5304 pseudo->setAttributeValue(LXML_NS_NONE, attribute_id, U"");
5305 // We are called by lvrend.cpp setNodeStyle(), after the parent
5306 // style and font have been fully set up. We could set this pseudo
5307 // element style with pseudo->initNodeStyle(), as it can inherit
5308 // properly, but we should not:
5309 // - when re-rendering, initNodeStyleRecursive()/updateStyleDataRecursive()
5310 // will iterate thru this node we just added as a child, and do it.
5311 // - when XML loading, we could do it for the "Before" pseudo element,
5312 // but for the "After" one, we need to wait for all real children to be
5313 // added and have their style applied - just because they can change
5314 // open-quote/close-quote nesting levels - to be sure we get the
5315 // proper nesting level quote char for the After node.
5316 // So, for the XML loading phase, we do that in onBodyEnter() and
5317 // onBodyExit() when called on the parent node.
5318 }
5319
5320 #endif
5321 }
5322
5323 #if BUILD_LITE!=1
resetRendMethodToInline(ldomNode * node)5324 static void resetRendMethodToInline( ldomNode * node )
5325 {
5326 // we shouldn't reset to inline (visible) if display: none
5327 // (using node->getRendMethod() != erm_invisible seems too greedy and may
5328 // hide other nodes)
5329 if (node->getStyle()->display != css_d_none)
5330 node->setRendMethod(erm_inline);
5331 else if (node->getDocument()->getDOMVersionRequested() < 20180528) // do that in all cases
5332 node->setRendMethod(erm_inline);
5333 }
5334
resetRendMethodToInvisible(ldomNode * node)5335 static void resetRendMethodToInvisible( ldomNode * node )
5336 {
5337 node->setRendMethod(erm_invisible);
5338 }
5339 #endif
5340
removeChildren(int startIndex,int endIndex)5341 void ldomNode::removeChildren( int startIndex, int endIndex )
5342 {
5343 for ( int i=endIndex; i>=startIndex; i-- ) {
5344 removeChild(i)->destroy();
5345 }
5346 }
5347
autoboxChildren(int startIndex,int endIndex,bool handleFloating)5348 void ldomNode::autoboxChildren( int startIndex, int endIndex, bool handleFloating )
5349 {
5350 #if BUILD_LITE!=1
5351 if ( !isElement() )
5352 return;
5353 css_style_ref_t style = getStyle();
5354 bool pre = ( style->white_space >= css_ws_pre_line );
5355 // (css_ws_pre_line might need special care?)
5356 int firstNonEmpty = startIndex;
5357 int lastNonEmpty = endIndex;
5358
5359 bool hasInline = pre;
5360 bool hasNonEmptyInline = pre;
5361 bool hasFloating = false;
5362 // (Note: did not check how floats inside <PRE> are supposed to work)
5363 if ( !pre ) {
5364 while ( firstNonEmpty<=endIndex && getChildNode(firstNonEmpty)->isText() ) {
5365 lString32 s = getChildNode(firstNonEmpty)->getText();
5366 if ( !IsEmptySpace(s.c_str(), s.length() ) )
5367 break;
5368 firstNonEmpty++;
5369 }
5370 while ( lastNonEmpty>=endIndex && getChildNode(lastNonEmpty)->isText() ) {
5371 lString32 s = getChildNode(lastNonEmpty)->getText();
5372 if ( !IsEmptySpace(s.c_str(), s.length() ) )
5373 break;
5374 lastNonEmpty--;
5375 }
5376
5377 for ( int i=firstNonEmpty; i<=lastNonEmpty; i++ ) {
5378 ldomNode * node = getChildNode(i);
5379 if ( isInlineNode( node ) ) {
5380 hasInline = true;
5381 if ( !hasNonEmptyInline ) {
5382 if (node->isText()) {
5383 lString32 s = node->getText();
5384 if ( !IsEmptySpace(s.c_str(), s.length() ) ) {
5385 hasNonEmptyInline = true;
5386 }
5387 }
5388 else {
5389 if ( handleFloating && isFloatingNode(node) ) {
5390 // Ignore floatings
5391 }
5392 else {
5393 hasNonEmptyInline = true;
5394 // Note: when not using DO_NOT_CLEAR_OWN_FLOATS, we might
5395 // want to be more agressive in the removal of empty
5396 // elements, including nested empty elements which would
5397 // have no effect on the rendering (eg, some empty <link/>
5398 // or <span id="PageNumber123"/>), to avoid having the float
5399 // in an autoBox element with nothing else, which would
5400 // then be cleared and leave some blank space.
5401 // We initially did:
5402 // // For now, assume any inline node with some content
5403 // // (text or other inlines) is non empty.
5404 // if ( node->getChildCount() > 0 )
5405 // hasNonEmptyInline = true;
5406 // else if (node->getNodeId() == el_br) {
5407 // hasNonEmptyInline = true;
5408 // }
5409 // else {
5410 // const css_elem_def_props_t * ntype = node->getElementTypePtr();
5411 // if (ntype && ntype->is_object) // standalone image
5412 // hasNonEmptyInline = true;
5413 // }
5414 // and we could even use hasNonEmptyInlineContent() to get
5415 // rid of any nested empty elements and be sure to have our
5416 // float standalone and be able to have it rendered as block
5417 // instead of in an erm_final.
5418 //
5419 // But this was for edge cases (but really noticable), and it has
5420 // become less critical now that we have/ DO_NOT_CLEAR_OWN_FLOATS,
5421 // so let's not remove any element from our DOM (those with some
5422 // id= attribute might be the target of a link).
5423 //
5424 // Sample test case in China.EN at the top of the "Politics" section:
5425 // "...</div> <link/> (or any text) <div float>...</div> <div>..."
5426 // gets turned into:
5427 // "...</div>
5428 // <autoBoxing>
5429 // <link/> (or any text)
5430 // <floatBox>
5431 // <div float>...</div>
5432 // </floatBox>
5433 // </autoBoxing>
5434 // <div>..."
5435 // If the floatbox would be let outside of the autobox, it would
5436 // be fine when not DO_NOT_CLEAR_OWN_FLOATS too.
5437 }
5438 }
5439 }
5440 }
5441 if ( handleFloating && isFloatingNode(node) )
5442 hasFloating = true;
5443 if ( hasNonEmptyInline && hasFloating )
5444 break; // We know, no need to look more
5445 }
5446 }
5447
5448 if ( hasFloating && !hasNonEmptyInline) {
5449 // only multiple floats with empty spaces in between:
5450 // remove empty text nodes, and let the floats be blocks, don't autobox
5451 for ( int i=endIndex; i>=startIndex; i-- ) {
5452 if ( !isFloatingNode(getChildNode(i)) )
5453 removeChildren(i, i);
5454 }
5455 }
5456 else if ( hasInline ) { //&& firstNonEmpty<=lastNonEmpty
5457
5458 #ifdef TRACE_AUTOBOX
5459 CRLog::trace("Autobox children %d..%d of node <%s> childCount=%d", firstNonEmpty, lastNonEmpty, LCSTR(getNodeName()), getChildCount());
5460
5461 for ( int i=firstNonEmpty; i<=lastNonEmpty; i++ ) {
5462 ldomNode * node = getChildNode(i);
5463 if ( node->isText() )
5464 CRLog::trace(" text: %d '%s'", node->getDataIndex(), LCSTR(node->getText()));
5465 else
5466 CRLog::trace(" elem: %d <%s> rendMode=%d display=%d", node->getDataIndex(), LCSTR(node->getNodeName()), node->getRendMethod(), node->getStyle()->display);
5467 }
5468 #endif
5469 // remove trailing empty
5470 removeChildren(lastNonEmpty+1, endIndex);
5471
5472 // inner inline
5473 ldomNode * abox = insertChildElement( firstNonEmpty, LXML_NS_NONE, el_autoBoxing );
5474 moveItemsTo( abox, firstNonEmpty+1, lastNonEmpty+1 );
5475 // remove starting empty
5476 removeChildren(startIndex, firstNonEmpty-1);
5477 abox->initNodeStyle();
5478 if ( !BLOCK_RENDERING_N(this, FLOAT_FLOATBOXES) ) {
5479 // If we don't want floatBoxes floating, reset them to be
5480 // rendered inline among inlines
5481 abox->recurseMatchingElements( resetRendMethodToInline, isNotBoxingInlineBoxNode );
5482 }
5483 abox->setRendMethod( erm_final );
5484 }
5485 else if ( hasFloating) {
5486 // only floats, don't autobox them (otherwise the autobox wouldn't be floating)
5487 // remove trailing empty
5488 removeChildren(lastNonEmpty+1, endIndex);
5489 // remove starting empty
5490 removeChildren(startIndex, firstNonEmpty-1);
5491 }
5492 else {
5493 // only empty items: remove them instead of autoboxing
5494 removeChildren(startIndex, endIndex);
5495 }
5496 #endif
5497 }
5498
cleanIfOnlyEmptyTextInline(bool handleFloating)5499 bool ldomNode::cleanIfOnlyEmptyTextInline( bool handleFloating )
5500 {
5501 #if BUILD_LITE!=1
5502 if ( !isElement() )
5503 return false;
5504 css_style_ref_t style = getStyle();
5505 if ( style->white_space >= css_ws_pre )
5506 return false; // Don't mess with PRE (css_ws_pre_line might need special care?)
5507 // We return false as soon as we find something non text, or text non empty
5508 int i = getChildCount()-1;
5509 for ( ; i>=0; i-- ) {
5510 ldomNode * node = getChildNode(i);
5511 if ( node->isText() ) {
5512 lString32 s = node->getText();
5513 if ( !IsEmptySpace(s.c_str(), s.length() ) ) {
5514 return false;
5515 }
5516 }
5517 else if ( handleFloating && isFloatingNode(node) ) {
5518 // Ignore floatings
5519 }
5520 else { // non-text non-float element
5521 return false;
5522 }
5523 }
5524 // Ok, only empty text inlines, with possible floats
5525 i = getChildCount()-1;
5526 for ( ; i>=0; i-- ) {
5527 // With the tests done above, we just need to remove text nodes
5528 if ( getChildNode(i)->isText() ) {
5529 removeChildren(i, i);
5530 }
5531 }
5532 return true;
5533 #else
5534 return false;
5535 #endif
5536 }
5537
5538 /// returns true if element has inline content (non empty text, images, <BR>)
hasNonEmptyInlineContent(bool ignoreFloats)5539 bool ldomNode::hasNonEmptyInlineContent( bool ignoreFloats )
5540 {
5541 if ( getRendMethod() == erm_invisible ) {
5542 return false;
5543 }
5544 if ( ignoreFloats && BLOCK_RENDERING_N(this, FLOAT_FLOATBOXES) && getStyle()->float_ > css_f_none ) {
5545 return false;
5546 }
5547 // With some other bool param, we might want to also check for
5548 // padding top/bottom (and height if check ENSURE_STYLE_HEIGHT)
5549 // if these will introduce some content.
5550 if ( isText() ) {
5551 lString32 s = getText();
5552 return !IsEmptySpace(s.c_str(), s.length() );
5553 }
5554 if (getNodeId() == el_br) {
5555 return true;
5556 }
5557 const css_elem_def_props_t * ntype = getElementTypePtr();
5558 if (ntype && ntype->is_object) { // standalone image
5559 return true;
5560 }
5561 for ( int i=0; i<(int)getChildCount(); i++ ) {
5562 if ( getChildNode(i)->hasNonEmptyInlineContent() ) {
5563 return true;
5564 }
5565 }
5566 return false;
5567 }
5568
5569 #if BUILD_LITE!=1
detectChildTypes(ldomNode * parent,bool & hasBlockItems,bool & hasInline,bool & hasInternalTableItems,bool & hasFloating,bool detectFloating=false)5570 static void detectChildTypes( ldomNode * parent, bool & hasBlockItems, bool & hasInline,
5571 bool & hasInternalTableItems, bool & hasFloating, bool detectFloating=false )
5572 {
5573 hasBlockItems = false;
5574 hasInline = false;
5575 hasFloating = false;
5576 if ( parent->getNodeId() == el_pseudoElem ) {
5577 // pseudoElem (generated from CSS ::before and ::after), will have
5578 // some (possibly empty) plain text content.
5579 hasInline = true;
5580 return; // and it has no children
5581 }
5582 int len = parent->getChildCount();
5583 for ( int i=len-1; i>=0; i-- ) {
5584 ldomNode * node = parent->getChildNode(i);
5585 if ( !node->isElement() ) {
5586 // text
5587 hasInline = true;
5588 }
5589 else if ( detectFloating && node->getStyle()->float_ > css_f_none ) {
5590 hasFloating = true;
5591 }
5592 else {
5593 // element
5594 int d = node->getStyle()->display;
5595 int m = node->getRendMethod();
5596 if ( d==css_d_none || m==erm_invisible )
5597 continue;
5598 if ( m==erm_inline ) { //d==css_d_inline || d==css_d_run_in
5599 hasInline = true;
5600 } else {
5601 hasBlockItems = true;
5602 // (Table internal elements are all block items in the context
5603 // where hasBlockItems is used, so account for them in both)
5604 if ( ( d > css_d_table && d <= css_d_table_caption ) || ( m > erm_table ) ) {
5605 hasInternalTableItems = true;
5606 }
5607 }
5608 }
5609 }
5610 }
5611
5612 // Generic version of autoboxChildren() without any specific inline/block checking,
5613 // accepting any element id (from the enum el_*, like el_div, el_tabularBox) as
5614 // the wrapping element.
boxWrapChildren(int startIndex,int endIndex,lUInt16 elementId)5615 ldomNode * ldomNode::boxWrapChildren( int startIndex, int endIndex, lUInt16 elementId )
5616 {
5617 if ( !isElement() )
5618 return NULL;
5619 int firstNonEmpty = startIndex;
5620 int lastNonEmpty = endIndex;
5621
5622 while ( firstNonEmpty<=endIndex && getChildNode(firstNonEmpty)->isText() ) {
5623 lString32 s = getChildNode(firstNonEmpty)->getText();
5624 if ( !IsEmptySpace(s.c_str(), s.length() ) )
5625 break;
5626 firstNonEmpty++;
5627 }
5628 while ( lastNonEmpty>=endIndex && getChildNode(lastNonEmpty)->isText() ) {
5629 lString32 s = getChildNode(lastNonEmpty)->getText();
5630 if ( !IsEmptySpace(s.c_str(), s.length() ) )
5631 break;
5632 lastNonEmpty--;
5633 }
5634
5635 // printf("boxWrapChildren %d>%d | %d<%d\n", startIndex, firstNonEmpty, lastNonEmpty, endIndex);
5636 if ( firstNonEmpty<=lastNonEmpty ) {
5637 // remove trailing empty
5638 removeChildren(lastNonEmpty+1, endIndex);
5639 // create wrapping container
5640 ldomNode * box = insertChildElement( firstNonEmpty, LXML_NS_NONE, elementId );
5641 moveItemsTo( box, firstNonEmpty+1, lastNonEmpty+1 );
5642 // remove starting empty
5643 removeChildren(startIndex, firstNonEmpty-1);
5644 return box;
5645 }
5646 else {
5647 // Only empty items: remove them instead of box wrapping them
5648 removeChildren(startIndex, endIndex);
5649 return NULL;
5650 }
5651 }
5652
5653 // Uncomment to debug COMPLETE_INCOMPLETE_TABLES tabularBox wrapping
5654 // #define DEBUG_INCOMPLETE_TABLE_COMPLETION
5655
5656 // init table element render methods
5657 // states: 0=table, 1=colgroup, 2=rowgroup, 3=row, 4=cell
5658 // returns table cell count
5659 // When BLOCK_RENDERING_COMPLETE_INCOMPLETE_TABLES, we follow rules
5660 // from the "Generate missing child wrappers" section in:
5661 // https://www.w3.org/TR/CSS22/tables.html#anonymous-boxes
5662 // https://www.w3.org/TR/css-tables-3/#fixup (clearer than previous one)
5663 // and we wrap unproper children in a tabularBox element.
initTableRendMethods(ldomNode * enode,int state)5664 int initTableRendMethods( ldomNode * enode, int state )
5665 {
5666 //main node: table
5667 if ( state==0 && ( enode->getStyle()->display==css_d_table ||
5668 enode->getStyle()->display==css_d_inline_table ||
5669 (enode->getStyle()->display==css_d_inline_block && enode->getNodeId()==el_table) ) ) {
5670 enode->setRendMethod( erm_table );
5671 }
5672 int cellCount = 0; // (returned, but not used anywhere)
5673 int cnt = enode->getChildCount();
5674 int i;
5675 int first_unproper = -1; // keep track of consecutive unproper children that
5676 int last_unproper = -1; // must all be wrapped in a single wrapper
5677 for (i=0; i<cnt; i++) {
5678 ldomNode * child = enode->getChildNode( i );
5679 css_display_t d;
5680 if ( child->isElement() ) {
5681 d = child->getStyle()->display;
5682 }
5683 else { // text node
5684 d = css_d_inline;
5685 // Not sure about what to do with whitespace only text nodes:
5686 // we shouldn't meet any alongside real elements (as whitespace
5687 // around and at start/end of block nodes are discarded), but
5688 // we may in case of style changes (inline > table) after
5689 // a book has been loaded.
5690 // Not sure if we should handle them differently when no unproper
5691 // elements yet (they will be discarded by the table render algo),
5692 // and when among unpropers (they could find their place in the
5693 // wrapped table cell).
5694 // Note that boxWrapChildren() called below will remove
5695 // them at start or end of an unproper elements sequence.
5696 }
5697 bool is_last = (i == cnt-1);
5698 bool is_proper = false;
5699 if ( state==0 ) { // in table
5700 if ( d==css_d_table_row ) {
5701 child->setRendMethod( erm_table_row );
5702 cellCount += initTableRendMethods( child, 3 ); // > row
5703 is_proper = true;
5704 }
5705 else if ( d==css_d_table_row_group ) {
5706 child->setRendMethod( erm_table_row_group );
5707 cellCount += initTableRendMethods( child, 2 ); // > rowgroup
5708 is_proper = true;
5709 }
5710 else if ( d==css_d_table_header_group ) {
5711 child->setRendMethod( erm_table_header_group );
5712 cellCount += initTableRendMethods( child, 2 ); // > rowgroup
5713 is_proper = true;
5714 }
5715 else if ( d==css_d_table_footer_group ) {
5716 child->setRendMethod( erm_table_footer_group );
5717 cellCount += initTableRendMethods( child, 2 ); // > rowgroup
5718 is_proper = true;
5719 }
5720 else if ( d==css_d_table_column_group ) {
5721 child->setRendMethod( erm_table_column_group );
5722 cellCount += initTableRendMethods( child, 1 ); // > colgroup
5723 is_proper = true;
5724 }
5725 else if ( d==css_d_table_column ) {
5726 child->setRendMethod( erm_table_column );
5727 is_proper = true;
5728 }
5729 else if ( d==css_d_table_caption ) {
5730 child->setRendMethod( erm_final );
5731 is_proper = true;
5732 }
5733 else if ( d==css_d_none ) {
5734 child->setRendMethod( erm_invisible );
5735 is_proper = true;
5736 }
5737 else if ( child->getNodeId()==el_tabularBox ) {
5738 // Most probably added by us in a previous rendering
5739 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5740 printf("initTableRendMethods(0): (reused)wrapping unproper > row\n");
5741 #endif
5742 child->setRendMethod( erm_table_row );
5743 cellCount += initTableRendMethods( child, 3 ); // > row
5744 is_proper = true;
5745 }
5746 }
5747 else if ( state==2 ) { // in rowgroup
5748 if ( d==css_d_table_row ) {
5749 child->setRendMethod( erm_table_row );
5750 cellCount += initTableRendMethods( child, 3 ); // > row
5751 is_proper = true;
5752 }
5753 else if ( d==css_d_none ) {
5754 child->setRendMethod( erm_invisible );
5755 is_proper = true;
5756 }
5757 else if ( child->getNodeId()==el_tabularBox ) {
5758 // Most probably added by us in a previous rendering
5759 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5760 printf("initTableRendMethods(2): (reused)wrapping unproper > row\n");
5761 #endif
5762 child->setRendMethod( erm_table_row );
5763 cellCount += initTableRendMethods( child, 3 ); // > row
5764 is_proper = true;
5765 }
5766 }
5767 else if ( state==3 ) { // in row
5768 if ( d==css_d_table_cell ) {
5769 // This will set the rend method of the cell to either erm_block
5770 // or erm_final, depending on its content.
5771 child->initNodeRendMethodRecursive();
5772 cellCount++;
5773 is_proper = true;
5774 }
5775 else if ( d==css_d_none ) {
5776 child->setRendMethod( erm_invisible );
5777 is_proper = true;
5778 }
5779 else if ( child->getNodeId()==el_tabularBox ) {
5780 // Most probably added by us in a previous rendering
5781 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5782 printf("initTableRendMethods(3): (reused)wrapping unproper > cell\n");
5783 #endif
5784 // This will set the rend method of the cell to either erm_block
5785 // or erm_final, depending on its content.
5786 child->initNodeRendMethodRecursive();
5787 cellCount++;
5788 is_proper = true;
5789 }
5790 }
5791 else if ( state==1 ) { // in colgroup
5792 if ( d==css_d_table_column ) {
5793 child->setRendMethod( erm_table_column );
5794 is_proper = true;
5795 }
5796 else {
5797 // No need to tabularBox invalid colgroup children:
5798 // they are not rendered, and should be considered
5799 // as if display: none.
5800 child->setRendMethod( erm_invisible );
5801 is_proper = true;
5802 }
5803 }
5804 else { // shouldn't be reached
5805 crFatalError(151, "initTableRendMethods state unexpected");
5806 // child->setRendMethod( erm_final );
5807 }
5808
5809 // Check and deal with unproper children
5810 if ( !is_proper ) { // Unproper child met
5811 // printf("initTableRendMethods(%d): child %d is unproper\n", state, i);
5812 lUInt32 rend_flags = enode->getDocument()->getRenderBlockRenderingFlags();
5813 if ( BLOCK_RENDERING(rend_flags, COMPLETE_INCOMPLETE_TABLES) ) {
5814 // We can insert a tabularBox element to wrap unproper elements
5815 last_unproper = i;
5816 if (first_unproper < 0)
5817 first_unproper = i;
5818 }
5819 else {
5820 // Asked to not complete incomplete tables, or we can't insert
5821 // tabularBox elements anymore
5822 if ( !BLOCK_RENDERING(rend_flags, ENHANCED) ) {
5823 // Legacy behaviour was to just make invisible internal-table
5824 // elements that were not found in their proper internal-table
5825 // container, but let other non-internal-table elements be
5826 // (which might be rendered and drawn quite correctly when
5827 // they are erm_final/erm_block, but won't be if erm_inline).
5828 if ( d > css_d_table ) {
5829 child->setRendMethod( erm_invisible );
5830 }
5831 }
5832 else {
5833 // When in enhanced mode, we let the ones that could
5834 // be rendered and drawn quite correctly be. But we'll
5835 // have the others drawn as erm_killed, showing a small
5836 // symbol so users know some content is missing.
5837 if ( d > css_d_table || d <= css_d_inline ) {
5838 child->setRendMethod( erm_killed );
5839 }
5840 // Note that there are other situations where some content
5841 // would not be shown when !COMPLETE_INCOMPLETE_TABLES, and
5842 // for which we are not really able to set some node as
5843 // erm_killed (for example, with TABLE > TABLE, the inner
5844 // one will be rendered, but the outer one would have
5845 // a height=0, and so the inner content will overflow
5846 // its container and will not be drawn...)
5847 }
5848 }
5849 }
5850 if ( first_unproper >= 0 && (is_proper || is_last) ) {
5851 // We met unproper children, but we now have a proper child, or we're done:
5852 // wrap all these consecutive unproper nodes inside a single tabularBox
5853 // element with the proper rendering method.
5854 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
5855 printf("initTableRendMethods(%d): wrapping unproper %d>%d\n",
5856 state, first_unproper, last_unproper);
5857 #endif
5858 int elems_removed = last_unproper - first_unproper + 1;
5859 ldomNode * tbox = enode->boxWrapChildren(first_unproper, last_unproper, el_tabularBox);
5860 if ( tbox && !tbox->isNull() ) {
5861 elems_removed -= 1; // tabularBox added
5862 if ( state==0 || state==2 ) { // in table or rowgroup
5863 // No real need to store the style as an attribute: it would
5864 // be remembered and re-used when styles change, and just
5865 // setting the appropriate rendering method is all that is
5866 // needed for rendering after this.
5867 // tbox->setAttributeValue(LXML_NS_NONE, enode->getDocument()->getAttrNameIndex(U"style"), U"display: table-row");
5868 tbox->initNodeStyle();
5869 tbox->setRendMethod( erm_table_row );
5870 cellCount += initTableRendMethods( tbox, 3 ); // > row
5871 }
5872 else if ( state==3 ) {
5873 tbox->initNodeStyle();
5874 // This will set the rend method of the cell to either erm_block
5875 // or erm_final, depending on its content.
5876 tbox->initNodeRendMethodRecursive();
5877 cellCount++;
5878 }
5879 else if ( state==1 ) { // should not happen, see above
5880 tbox->initNodeStyle();
5881 tbox->setRendMethod( erm_table_column );
5882 }
5883 }
5884 // If tbox is NULL, all unproper have been removed, and no element added
5885 if (is_last)
5886 break;
5887 // Account for what's been removed in our loop index and end
5888 i -= elems_removed;
5889 cnt -= elems_removed;
5890 first_unproper = -1;
5891 last_unproper = -1;
5892 }
5893 }
5894 // if ( state==0 ) {
5895 // dumpRendMethods( enode, cs32(" ") );
5896 // }
5897 return cellCount;
5898 }
5899
hasInvisibleParent(ldomNode * node)5900 bool hasInvisibleParent( ldomNode * node )
5901 {
5902 for ( ; !node->isRoot(); node = node->getParentNode() )
5903 if ( node->getStyle()->display==css_d_none )
5904 return true;
5905 return false;
5906 }
5907
isFloatingBox() const5908 bool ldomNode::isFloatingBox() const
5909 {
5910 // BLOCK_RENDERING_FLOAT_FLOATBOXES is what triggers rendering
5911 // the floats floating. They are wrapped in a floatBox, possibly
5912 // not floating, when BLOCK_RENDERING_WRAP_FLOATS.
5913 if ( BLOCK_RENDERING_N(this, FLOAT_FLOATBOXES) && getNodeId() == el_floatBox
5914 && getStyle()->float_ > css_f_none)
5915 return true;
5916 return false;
5917 }
5918
5919 /// is node an inlineBox that has not been re-inlined by having
5920 /// its child no more inline-block/inline-table
isBoxingInlineBox() const5921 bool ldomNode::isBoxingInlineBox() const
5922 {
5923 // BLOCK_RENDERING_BOX_INLINE_BLOCKS) is what ensures inline-block
5924 // are boxed and rendered as an inline block, but we may have them
5925 // wrapping a node that is no more inline-block (when some style
5926 // tweaks have changed the display: property).
5927 if ( getNodeId() == el_inlineBox && BLOCK_RENDERING_N(this, BOX_INLINE_BLOCKS) ) {
5928 if (getChildCount() == 1) {
5929 css_display_t d = getChildNode(0)->getStyle()->display;
5930 if (d == css_d_inline_block || d == css_d_inline_table) {
5931 return true;
5932 }
5933 // Also if this box parent is <ruby> and if what this inlineBox
5934 // contains (probably a rubyBox) is being rendered as erm_table
5935 if ( getChildNode(0)->getRendMethod() == erm_table && getParentNode()
5936 && getParentNode()->getStyle()->display == css_d_ruby ) {
5937 return true;
5938 }
5939 return isEmbeddedBlockBoxingInlineBox(true); // avoid rechecking what we just checked
5940 }
5941 }
5942 return false;
5943 }
5944
5945 /// is node an inlineBox that wraps a bogus embedded block (not inline-block/inline-table)
5946 /// can be called with inline_box_checks_done=true when isBoxingInlineBox() has already
5947 /// been called to avoid rechecking what is known
isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done) const5948 bool ldomNode::isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done) const
5949 {
5950 if ( !inline_box_checks_done ) {
5951 if ( getNodeId() != el_inlineBox || !BLOCK_RENDERING_N(this, BOX_INLINE_BLOCKS) )
5952 return false;
5953 if (getChildCount() != 1)
5954 return false;
5955 css_display_t d = getChildNode(0)->getStyle()->display;
5956 if (d == css_d_inline_block || d == css_d_inline_table) {
5957 return false; // regular boxing inlineBox
5958 }
5959 if ( getChildNode(0)->getRendMethod() == erm_table && getParentNode()
5960 && getParentNode()->getStyle()->display == css_d_ruby ) {
5961 return false; // inlineBox wrapping a rubyBox as a child of <ruby>
5962 }
5963 }
5964 if ( hasAttribute( attr_T ) ) { // T="EmbeddedBlock"
5965 // (no other possible value yet, no need to compare strings)
5966 int cm = getChildNode(0)->getRendMethod();
5967 if ( cm == erm_inline || cm == erm_invisible || cm == erm_killed )
5968 return false; // child has been reset to inline
5969 return true;
5970 }
5971 return false;
5972 }
5973
initNodeRendMethod()5974 void ldomNode::initNodeRendMethod()
5975 {
5976 // This method is called when re-rendering, but also while
5977 // initially loading a document.
5978 // On initial loading:
5979 // A node's style is defined when the node element XML tag
5980 // opening is processed (by lvrend.cpp setNodeStyle() which
5981 // applies inheritance from its parent, which has
5982 // already been parsed).
5983 // This method is called when the node element XML tag is
5984 // closed, so all its children are known, have styles, and
5985 // have had this method called on them.
5986 // On re-rendering:
5987 // Styles are first applied recursively, parents first (because
5988 // of inheritance).
5989 // This method is then called thru recurseElementsDeepFirst, so
5990 // from deepest children up to their parents up to the root node.
5991 // So, this method should decide how this node is going to be
5992 // rendered (inline, block containing other blocks, or final block
5993 // containing only inlines), only from the node's own style, and
5994 // from the styles and rendering methods of its children.
5995 if ( !isElement() )
5996 return;
5997 if ( isRoot() ) {
5998 setRendMethod(erm_block);
5999 return;
6000 }
6001
6002 // DEBUG TEST
6003 // if ( getParentNode()->getChildIndex( getDataIndex() )<0 ) {
6004 // CRLog::error("Invalid parent->child relation for nodes %d->%d", getParentNode()->getDataIndex(), getDataIndex() );
6005 // }
6006 // if ( getNodeName() == "image" ) {
6007 // CRLog::trace("Init log for image");
6008 // }
6009
6010 // Needed if COMPLETE_INCOMPLETE_TABLES, so have it updated along
6011 // the way to avoid an extra loop for checking if we have some.
6012 bool hasInternalTableItems = false;
6013
6014 int d = getStyle()->display;
6015 lUInt32 rend_flags = getDocument()->getRenderBlockRenderingFlags();
6016
6017 if ( hasInvisibleParent(this) ) { // (should be named isInvisibleOrHasInvisibleParent())
6018 // Note: we could avoid that up-to-root-node walk for each node
6019 // by inheriting css_d_none in setNodeStyle(), and just using
6020 // "if ( d==css_d_none )" instead of hasInvisibleParent(this).
6021 // But not certain this would have no side effect, and some
6022 // quick tests show no noticeable change in rendering timing.
6023 //
6024 //recurseElements( resetRendMethodToInvisible );
6025 setRendMethod(erm_invisible);
6026 } else if ( d==css_d_inline ) {
6027 // Used to be: an inline parent resets all its children to inline
6028 // (so, if some block content is erroneously wrapped in a SPAN, all
6029 // the content became inline...), except, depending on what's enabled:
6030 // - nodes with float: which can stay block among inlines
6031 // - the inner content of inlineBoxes (the inlineBox is already inline)
6032 // recurseMatchingElements( resetRendMethodToInline, isNotBoxWrappingNode );
6033 //
6034 // But we don't want to "reset all its children to inline" when a bogus
6035 // spurious block element happens to be inside some inline one, as this
6036 // can be seen happening (<small> multiple <p>...</small>).
6037 // So, when BOX_INLINE_BLOCKS is enabled, we wrap such block elements inside
6038 // a <inlineBox> element, nearly just like if it were "display: inline-block",
6039 // with a few tweaks in its rendering (see below).
6040 // Or, if it contains only block elements, and empty text nodes, we can just
6041 // set this inline element to be erm_block.
6042 //
6043 // Some discussions about that "block inside inline" at:
6044 // https://github.com/w3c/csswg-drafts/issues/1477
6045 // https://stackoverflow.com/questions/1371307/displayblock-inside-displayinline
6046 //
6047 if ( !BLOCK_RENDERING(rend_flags, BOX_INLINE_BLOCKS) ) {
6048 // No support for anything but inline elements, and possibly embedded floats
6049 recurseMatchingElements( resetRendMethodToInline, isNotBoxWrappingNode );
6050 }
6051 else if ( !isNotBoxWrappingNode(this) ) {
6052 // If this node is already a box wrapping node (active floatBox or inlineBox,
6053 // possibly a <inlineBox T="EmbeddedBlock"> created here in a previous
6054 // rendering), just set it to erm_inline.
6055 setRendMethod(erm_inline);
6056 }
6057 else {
6058 // Set this inline element to be erm_inline, and look at its children
6059 setRendMethod(erm_inline);
6060 // Quick scan first, before going into more checks if needed
6061 bool has_block_nodes = false;
6062 bool has_inline_nodes = false;
6063 for ( int i=0; i < getChildCount(); i++ ) {
6064 ldomNode * child = getChildNode( i );
6065 if ( !child->isElement() ) // text node
6066 continue;
6067 int cm = child->getRendMethod();
6068 if ( cm == erm_inline ) {
6069 has_inline_nodes = true; // We won't be able to make it erm_block
6070 continue;
6071 }
6072 if ( cm == erm_invisible || cm == erm_killed )
6073 continue;
6074 if ( !isNotBoxWrappingNode( child ) ) {
6075 // This child is already wrapped by a floatBox or inlineBox
6076 continue;
6077 }
6078 has_block_nodes = true;
6079 if ( has_inline_nodes )
6080 break; // we know enough
6081 }
6082 if ( has_block_nodes ) {
6083 bool has_non_empty_text_nodes = false;
6084 bool do_wrap_blocks = true;
6085 if ( !has_inline_nodes ) {
6086 // No real inline nodes. Inspect each text node to see if they
6087 // are all empty text.
6088 for ( int i=0; i < getChildCount(); i++ ) {
6089 if ( getChildNode(i)->isText() ) {
6090 lString32 s = getChildNode(i)->getText();
6091 if ( !IsEmptySpace(s.c_str(), s.length() ) ) {
6092 has_non_empty_text_nodes = true;
6093 break;
6094 }
6095 }
6096 }
6097 if ( !has_non_empty_text_nodes ) {
6098 // We can be a block wrapper (renderBlockElementEnhanced/Legacy will
6099 // skip empty text nodes, no need to remove them)
6100 setRendMethod(erm_block);
6101 do_wrap_blocks = false;
6102 }
6103 }
6104 if ( do_wrap_blocks ) {
6105 // We have a mix of inline nodes or non-empty text, and block elements:
6106 // wrap each block element in a <inlineBox T="EmbeddedBlock">.
6107 for ( int i=getChildCount()-1; i >=0; i-- ) {
6108 ldomNode * child = getChildNode( i );
6109 if ( !child->isElement() ) // text node
6110 continue;
6111 int cm = child->getRendMethod();
6112 if ( cm == erm_inline || cm == erm_invisible || cm == erm_killed )
6113 continue;
6114 if ( !isNotBoxWrappingNode( child ) )
6115 continue;
6116 // This child is erm_block or erm_final (or some other erm_table like rend method).
6117 // It will be inside a upper erm_final
6118 // Wrap this element into an inlineBox, just as if it was display:inline-block,
6119 // with a few differences that will be handled by lvrend.cpp/lvtextfm.cpp:
6120 // - it should behave like if it has width: 100%, so preceeding
6121 // and following text/inlines element will be on their own line
6122 // - the previous line should not be justified
6123 // - in the matter of page splitting, lines (as they are 100%-width) should
6124 // be forwarded to the parent flow/context
6125 // Remove any preceeding or following empty text nodes (there can't
6126 // be consecutive text nodes) so we don't get spurious empty lines.
6127 if ( i < getChildCount()-1 && getChildNode(i+1)->isText() ) {
6128 lString32 s = getChildNode(i+1)->getText();
6129 if ( IsEmptySpace(s.c_str(), s.length() ) ) {
6130 removeChildren(i+1, i+1);
6131 }
6132 }
6133 if ( i > 0 && getChildNode(i-1)->isText() ) {
6134 lString32 s = getChildNode(i-1)->getText();
6135 if ( IsEmptySpace(s.c_str(), s.length() ) ) {
6136 removeChildren(i-1, i-1);
6137 i--; // update our position
6138 }
6139 }
6140 ldomNode * ibox = insertChildElement( i, LXML_NS_NONE, el_inlineBox );
6141 moveItemsTo( ibox, i+1, i+1 ); // move this child from 'this' into ibox
6142 // Mark this inlineBox so we can handle its pecularities
6143 ibox->setAttributeValue(LXML_NS_NONE, attr_T, U"EmbeddedBlock");
6144 setNodeStyle( ibox, getStyle(), getFont() );
6145 ibox->setRendMethod( erm_inline );
6146 }
6147 }
6148 }
6149 }
6150 } else if ( d==css_d_ruby ) {
6151 // This will be dealt in a big section below. For now, reset everything
6152 // to inline as ruby is only allowed to contain inline content.
6153 // We don't support the newer display: values like ruby-base, ruby-text...,
6154 // but only "display: ruby" which is just set on the <ruby> element
6155 // (which allows us to have it reset back to "display: inline" if we
6156 // don't wan't ruby support).
6157 // recurseElements( resetRendMethodToInline );
6158 // Or may be not: looks like we can support <ruby> inside <ruby>,
6159 // so allow that; and probably anything nested, as we'll handle
6160 // that just like a table cell content.
6161 setRendMethod(erm_inline);
6162 } else if ( d==css_d_run_in ) {
6163 // runin
6164 //CRLog::trace("switch all children elements of <%s> to inline", LCSTR(getNodeName()));
6165 recurseElements( resetRendMethodToInline );
6166 setRendMethod(erm_inline);
6167 } else if ( d==css_d_list_item_legacy ) {
6168 // list item (no more used, obsolete rendering method)
6169 setRendMethod(erm_final);
6170 } else if ( d==css_d_table ) {
6171 // table: this will "Generate missing child wrappers" if needed
6172 initTableRendMethods( this, 0 );
6173 // Not sure if we should do the same for the other css_d_table_* and
6174 // call initTableRendMethods(this, 1/2/3) so that the "Generate missing
6175 // child wrappers" step is done before the "Generate missing parents" step
6176 // we might be doing below - to conform to the order of steps in the specs.
6177 } else if ( d==css_d_inline_table && ( BLOCK_RENDERING(rend_flags, COMPLETE_INCOMPLETE_TABLES) || getNodeId()==el_table ) ) {
6178 // Only if we're able to complete incomplete tables, or if this
6179 // node is itself a <TABLE>. Otherwise, fallback to the following
6180 // catch-all 'else' and render its content as block.
6181 // (Note that we should skip that if the node is an image, as
6182 // initTableRendMethods() would not be able to do anything with
6183 // it as it can't add children to an IMG. Hopefully, the specs
6184 // say replaced elements like IMG should not have table-like
6185 // display: values - which setNodeStyle() ensures.)
6186 // Any element can have "display: inline-table", and if it's not
6187 // a TABLE, initTableRendMethods() will complete/wrap it to make
6188 // it possibly the single cell of a TABLE. This should naturally
6189 // ensure all the differences between inline-block and inline-table.
6190 // https://stackoverflow.com/questions/19352072/what-is-the-difference-between-inline-block-and-inline-table/19352149#19352149
6191 initTableRendMethods( this, 0 );
6192 // Note: if (d==css_d_inline_block && getNodeId()==el_table), we
6193 // should NOT call initTableRendMethods()! It should be rendered
6194 // as a block, and if its children are actually TRs, they will be
6195 // wrapped in a "missing parent" tabularBox wrapper that will
6196 // have initTableRendMethods() called on it.
6197 } else {
6198 // block or final
6199 // remove last empty space text nodes
6200 bool hasBlockItems = false;
6201 bool hasInline = false;
6202 bool hasFloating = false;
6203 // Floating nodes, thus block, are accounted apart from inlines
6204 // and blocks, as their behaviour is quite specific.
6205 // - When !PREPARE_FLOATBOXES, we just don't deal specifically with
6206 // floats, for a rendering more similar to legacy rendering: SPANs
6207 // with float: will be considered as non-floating inline, while
6208 // DIVs with float: will be considered as block elements, possibly
6209 // causing autoBoxing of surrounding content with only inlines.
6210 // - When PREPARE_FLOATBOXES (even if !FLOAT_FLOATBOXES), we do prepare
6211 // floats and floatBoxes to be consistent, ready to be floating, or
6212 // not and flat (with a rendering possibly not similar to legacy),
6213 // without any display hash mismatch (so that toggling does not
6214 // require a full reloading). SPANs and DIVs with float: mixed with
6215 // inlines will be considered as inline when !FLOAT_FLOATBOXES, to
6216 // avoid having autoBoxing elements that would mess with a correct
6217 // floating rendering.
6218 // Note that FLOAT_FLOATBOXES requires having PREPARE_FLOATBOXES.
6219 bool handleFloating = BLOCK_RENDERING(rend_flags, PREPARE_FLOATBOXES);
6220
6221 detectChildTypes( this, hasBlockItems, hasInline, hasInternalTableItems, hasFloating, handleFloating );
6222 const css_elem_def_props_t * ntype = getElementTypePtr();
6223 if (ntype && ntype->is_object) { // image
6224 // No reason to erm_invisible an image !
6225 // And it has to be erm_final to be drawn (or set to erm_inline
6226 // by some upper node).
6227 // (Note that setNodeStyle() made sure an image can't be
6228 // css_d_inline_table/css_d_table*, as per specs.)
6229 setRendMethod( erm_final );
6230 /* used to be:
6231 switch ( d )
6232 {
6233 case css_d_block:
6234 case css_d_list_item_block:
6235 case css_d_inline:
6236 case css_d_inline_block:
6237 case css_d_inline_table:
6238 case css_d_run_in:
6239 setRendMethod( erm_final );
6240 break;
6241 default:
6242 //setRendMethod( erm_invisible );
6243 recurseElements( resetRendMethodToInvisible );
6244 break;
6245 }
6246 */
6247 } else if ( hasBlockItems && !hasInline ) {
6248 // only blocks (or floating blocks) inside
6249 setRendMethod( erm_block );
6250 } else if ( !hasBlockItems && hasInline ) {
6251 // only inline (with possibly floating blocks that will
6252 // be dealt with by renderFinalBlock)
6253 if ( hasFloating ) {
6254 // If all the inline elements are empty space, we may as well
6255 // remove them and have our floats contained in a erm_block
6256 if ( cleanIfOnlyEmptyTextInline(true) ) {
6257 setRendMethod( erm_block );
6258 }
6259 else {
6260 if ( !BLOCK_RENDERING(rend_flags, FLOAT_FLOATBOXES) ) {
6261 // If we don't want floatBoxes floating, reset them to be
6262 // rendered inline among inlines
6263 recurseMatchingElements( resetRendMethodToInline, isNotBoxingInlineBoxNode );
6264 }
6265 setRendMethod( erm_final );
6266 }
6267 }
6268 else {
6269 setRendMethod( erm_final );
6270 }
6271 } else if ( !hasBlockItems && !hasInline ) {
6272 // nothing (or only floating blocks)
6273 // (don't ignore it as it might be some HR with borders/padding,
6274 // even if no content)
6275 setRendMethod( erm_block );
6276 } else if ( hasBlockItems && hasInline ) {
6277 // Mixed content of blocks and inline elements:
6278 // the consecutive inline elements should be considered part
6279 // of an anonymous block element - non-anonymous for crengine,
6280 // as we create a <autoBoxing> element and add it to the DOM),
6281 // taking care of ignoring unvaluable inline elements consisting
6282 // of only spaces.
6283 // Note: when there are blocks, inlines and floats mixed, we could
6284 // choose to let the floats be blocks, or include them with the
6285 // surrounding inlines into an autoBoxing:
6286 // - blocks: they will just be footprints (so, only 2 squares at
6287 // top left and right) over the inline/final content, and when
6288 // there are many, the text may not wrap fully around the floats...
6289 // - with inlines: they will wrap fully, but if the text is short,
6290 // the floats will be cleared, and there will be blank vertical
6291 // filling space...
6292 // The rendering can be really different, and there's no real way
6293 // of knowing which will be the best.
6294 // So, for now, go with including them with inlines into the
6295 // erm_final autoBoxing.
6296 // The above has become less critical after we added DO_NOT_CLEAR_OWN_FLOATS
6297 // and ALLOW_EXACT_FLOATS_FOOTPRINTS, and both options should render
6298 // similarly.
6299 // But still going with including them with inlines is best, as we
6300 // don't need to include them in the footprint (so, the limit of
6301 // 5 outer block float IDs is still available for real outer floats).
6302 if ( getParentNode()->getNodeId()==el_autoBoxing ) {
6303 // already autoboxed
6304 setRendMethod( erm_final );
6305 // This looks wrong: no reason to force child of autoBoxing to be
6306 // erm_final: most often, the autoBoxing has been created to contain
6307 // only inlines and set itself to be erm_final. So, it would have been
6308 // caught by the 'else if ( !hasBlockItems && hasInline )' above and
6309 // set to erm_final. If not, styles have changed, and it may contain
6310 // a mess of styles: it might be better to proceed with the following
6311 // cleanup (and have autoBoxing re-autoboxed... or not at all when
6312 // a cache file is used, and we'll end up being erm_final anyway).
6313 // But let's keep it, in case it handles some edge cases.
6314 } else {
6315 // cleanup or autobox
6316 int i=getChildCount()-1;
6317 for ( ; i>=0; i-- ) {
6318 ldomNode * node = getChildNode(i);
6319
6320 // DEBUG TEST
6321 // if ( getParentNode()->getChildIndex( getDataIndex() )<0 ) {
6322 // CRLog::error("Invalid parent->child relation for nodes %d->%d",
6323 // getParentNode()->getDataIndex(), getDataIndex() );
6324 // }
6325
6326 // We want to keep float:'ing nodes with inline nodes, so they stick with their
6327 // siblings inline nodes in an autoBox: the erm_final autoBox will deal
6328 // with rendering the floating node, and the inline text around it
6329 if ( isInlineNode(node) || (handleFloating && isFloatingNode(node)) ) {
6330 int j = i-1;
6331 for ( ; j>=0; j-- ) {
6332 node = getChildNode(j);
6333 if ( !isInlineNode(node) && !(handleFloating && isFloatingNode(node)) )
6334 break;
6335 }
6336 j++;
6337 // j..i are inline
6338 if ( j>0 || i<(int)getChildCount()-1 )
6339 autoboxChildren( j, i, handleFloating );
6340 i = j;
6341 }
6342 else if ( i>0 && node->getRendMethod() == erm_final ) {
6343 // (We skip the following if the current node is not erm_final, as
6344 // if it is erm_block, we would break the block layout by making
6345 // it all inline in an erm_final autoBoxing.)
6346 // This node is not inline, but might be preceeded by a css_d_run_in node:
6347 // https://css-tricks.com/run-in/
6348 // https://developer.mozilla.org/en-US/docs/Web/CSS/display
6349 // "If the adjacent sibling of the element defined as "display: run-in" box
6350 // is a block box, the run-in box becomes the first inline box of the block
6351 // box that follows it. "
6352 // Hopefully only used for footnotes in fb2 where the footnote number
6353 // is in a block element, and the footnote text in another.
6354 // fb2.css sets the first block to be "display: run-in" as an
6355 // attempt to render both on the same line:
6356 // <section id="n1">
6357 // <title style="display: run-in; font-weight: bold;">
6358 // <p>1</p>
6359 // </title>
6360 // <p>Text footnote</p>
6361 // </section>
6362 //
6363 // This node might be that second block: look if preceeding node
6364 // is "run-in", and if it is, bring them both in an autoBoxing.
6365 ldomNode * prev = getChildNode(i-1);
6366 ldomNode * inBetweenTextNode = NULL;
6367 if ( prev->isText() && i-1>0 ) { // some possible empty text in between
6368 inBetweenTextNode = prev;
6369 prev = getChildNode(i-2);
6370 }
6371 if ( prev->isElement() && prev->getStyle()->display == css_d_run_in ) {
6372 bool do_autoboxing = true;
6373 int run_in_idx = inBetweenTextNode ? i-2 : i-1;
6374 int block_idx = i;
6375 if ( inBetweenTextNode ) {
6376 lString32 text = inBetweenTextNode->getText();
6377 if ( IsEmptySpace(text.c_str(), text.length() ) ) {
6378 removeChildren(i-1, i-1);
6379 block_idx = i-1;
6380 }
6381 else {
6382 do_autoboxing = false;
6383 }
6384 }
6385 if ( do_autoboxing ) {
6386 CRLog::debug("Autoboxing run-in items");
6387 // Sadly, to avoid having an erm_final inside another erm_final,
6388 // we need to reset the block node to be inline (but that second
6389 // erm_final would have been handled as inline anyway, except
6390 // for possibly updating the strut height/baseline).
6391 node->recurseMatchingElements( resetRendMethodToInline, isNotBoxingInlineBoxNode );
6392 // No need to autobox if there are only 2 children (the run-in and this box)
6393 if ( getChildCount()!=2 ) { // autobox run-in
6394 autoboxChildren( run_in_idx, block_idx, handleFloating );
6395 }
6396 }
6397 i = run_in_idx;
6398 }
6399 }
6400 }
6401 // check types after autobox
6402 detectChildTypes( this, hasBlockItems, hasInline, hasInternalTableItems, hasFloating, handleFloating );
6403 if ( hasInline ) {
6404 // Should not happen when autoboxing has been done above - but
6405 // if we couldn't, fallback to erm_final that will render all
6406 // children as inline
6407 setRendMethod( erm_final );
6408 } else {
6409 // All inlines have been wrapped into block autoBoxing elements
6410 // (themselves erm_final): we can be erm_block
6411 setRendMethod( erm_block );
6412 }
6413 }
6414 }
6415 }
6416
6417 if ( hasInternalTableItems && BLOCK_RENDERING(rend_flags, COMPLETE_INCOMPLETE_TABLES) && getRendMethod() == erm_block ) {
6418 // We have only block items, whether the original ones or the
6419 // autoBoxing nodes we created to wrap inlines, and all empty
6420 // inlines have been removed.
6421 // Some of these block items are css_d_table_cell, css_d_table_row...:
6422 // if this node (their parent) has not the expected css_d_table_row
6423 // or css_d_table display style, we are an unproper parent: we want
6424 // to add the missing parent(s) as wrapper(s) between this node and
6425 // these children.
6426 // (If we ended up not being erm_block, and we contain css_d_table_*
6427 // elements, everything is already messed up.)
6428 // Note: we first used the same <autoBoxing> element used to box
6429 // inlines as the table wrapper, which was fine, except in some edge
6430 // cases where some real autoBoxing were wrongly re-used as the tabular
6431 // wrapper (and we ended up having erm_final containing other erm_final
6432 // which were handled just like erm_inline with ugly side effects...)
6433 // So, best to introduce a decicated element: <tabularBox>.
6434 //
6435 // We follow rules from section "Generate missing parents" in:
6436 // https://www.w3.org/TR/CSS22/tables.html#anonymous-boxes
6437 // https://www.w3.org/TR/css-tables-3/#fixup (clearer than previous one)
6438 // Note: we do that not in the order given by the specs... As we walk
6439 // nodes deep first, we are here first "generating missing parents".
6440 // When walking up, and meeting a real css_d_table element, or
6441 // below when adding a generated erm_table tabularBox, we call
6442 // initTableRendMethods(0), which will "generate missing child wrappers".
6443 // Not really sure both orderings are equivalent, but let's hope it's ok...
6444
6445 // So, let's generate missing parents:
6446
6447 // "An anonymous table-row box must be generated around each sequence
6448 // of consecutive table-cell boxes whose parent is not a table-row."
6449 if ( d != css_d_table_row ) { // We're not a table row
6450 // Look if we have css_d_table_cell that we must wrap in a proper erm_table_row
6451 int last_table_cell = -1;
6452 int first_table_cell = -1;
6453 int last_visible_child = -1;
6454 bool did_wrap = false;
6455 int len = getChildCount();
6456 for ( int i=len-1; i>=0; i-- ) {
6457 ldomNode * child = getChildNode(i);
6458 int cd = child->getStyle()->display;
6459 int cm = child->getRendMethod();
6460 if ( cd == css_d_table_cell ) {
6461 if ( last_table_cell < 0 ) {
6462 last_table_cell = i;
6463 // We've met a css_d_table_cell, see if it is followed by
6464 // tabularBox siblings we might have passed by: they might
6465 // have been added by initTableRendMethods as a missing
6466 // children of a css_d_table_row: make them part of the row.
6467 for (int j=i+1; j<getChildCount(); j++) {
6468 if ( getChildNode(j)->getNodeId()==el_tabularBox )
6469 last_table_cell = j;
6470 else
6471 break;
6472 }
6473 }
6474 if ( i == 0 )
6475 first_table_cell = 0;
6476 if ( last_visible_child < 0 )
6477 last_visible_child = i;
6478 }
6479 else if ( last_table_cell >= 0 && child->getNodeId()==el_tabularBox ) {
6480 // We've seen a css_d_table_cell and we're seeing a tabularBox:
6481 // it might have been added by initTableRendMethods as a missing
6482 // children of a css_d_table_row: make it part of the row
6483 if ( i == 0 )
6484 first_table_cell = 0;
6485 if ( last_visible_child < 0 )
6486 last_visible_child = i;
6487 }
6488 else if ( cd == css_d_none || cm == erm_invisible ) {
6489 // Can be left inside or outside the wrap
6490 if ( i == 0 && last_table_cell >= 0 ) {
6491 // Include it if first and we're wrapping
6492 first_table_cell = 0;
6493 }
6494 }
6495 else {
6496 if ( last_table_cell >= 0)
6497 first_table_cell = i+1;
6498 if ( last_visible_child < 0 )
6499 last_visible_child = i;
6500 }
6501 if ( first_table_cell >= 0 ) {
6502 if ( first_table_cell == 0 && last_table_cell == last_visible_child
6503 && getNodeId()==el_tabularBox && !did_wrap ) {
6504 // All children are table cells, and we're not css_d_table_row,
6505 // but we are a tabularBox!
6506 // We were most probably created here in a previous rendering,
6507 // so just set us to be the anonymous table row.
6508 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6509 printf("initNodeRendMethod: (reused)wrapping unproper table cells %d>%d\n",
6510 first_table_cell, last_table_cell);
6511 #endif
6512 setRendMethod( erm_table_row );
6513 }
6514 else {
6515 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6516 printf("initNodeRendMethod: wrapping unproper table cells %d>%d\n",
6517 first_table_cell, last_table_cell);
6518 #endif
6519 ldomNode * tbox = boxWrapChildren(first_table_cell, last_table_cell, el_tabularBox);
6520 if ( tbox && !tbox->isNull() ) {
6521 tbox->initNodeStyle();
6522 tbox->setRendMethod( erm_table_row );
6523 }
6524 did_wrap = true;
6525 }
6526 last_table_cell = -1;
6527 first_table_cell = -1;
6528 }
6529 }
6530 }
6531
6532 // "An anonymous table or inline-table box must be generated around each
6533 // sequence of consecutive proper table child boxes which are misparented."
6534 // Not sure if we should skip that for some values of this node's
6535 // style->display among css_d_table*. Let's do as litterally as the specs.
6536 int last_misparented = -1;
6537 int first_misparented = -1;
6538 int last_visible_child = -1;
6539 bool did_wrap = false;
6540 int len = getChildCount();
6541 for ( int i=len-1; i>=0; i-- ) {
6542 ldomNode * child = getChildNode(i);
6543 int cd = child->getStyle()->display;
6544 int cm = child->getRendMethod();
6545 bool is_misparented = false;
6546 if ( (cd == css_d_table_row || cm == erm_table_row)
6547 && d != css_d_table && d != css_d_table_row_group
6548 && d != css_d_table_header_group && d != css_d_table_footer_group ) {
6549 // A table-row is misparented if its parent is neither a table-row-group
6550 // nor a table-root box (we include by checking cm==erm_table_row any
6551 // anonymous table row created just above).
6552 is_misparented = true;
6553 }
6554 else if ( cd == css_d_table_column && d != css_d_table && d != css_d_table_column_group ) {
6555 // A table-column box is misparented if its parent is neither
6556 // a table-column-group box nor a table-root box.
6557 is_misparented = true;
6558 }
6559 else if ( d != css_d_table && (cd == css_d_table_row_group || cd == css_d_table_header_group
6560 || cd == css_d_table_footer_group || cd == css_d_table_column_group
6561 || cd == css_d_table_caption ) ) {
6562 // A table-row-group, table-column-group, or table-caption box is misparented
6563 // if its parent is not a table-root box.
6564 is_misparented = true;
6565 }
6566 if ( is_misparented ) {
6567 if ( last_misparented < 0 ) {
6568 last_misparented = i;
6569 // As above for table cells: grab passed-by tabularBox siblings
6570 // to include them in the wrap
6571 for (int j=i+1; j<getChildCount(); j++) {
6572 if ( getChildNode(j)->getNodeId()==el_tabularBox )
6573 last_misparented = j;
6574 else
6575 break;
6576 }
6577 }
6578 if (i == 0)
6579 first_misparented = 0;
6580 if ( last_visible_child < 0 )
6581 last_visible_child = i;
6582 }
6583 else if ( last_misparented >= 0 && child->getNodeId()==el_tabularBox ) {
6584 // As above for table cells: include tabularBox siblings in the wrap
6585 if (i == 0)
6586 first_misparented = 0;
6587 if ( last_visible_child < 0 )
6588 last_visible_child = i;
6589 }
6590 else if ( cd == css_d_none || cm == erm_invisible ) {
6591 // Can be left inside or outside the wrap
6592 if ( i == 0 && last_misparented >= 0 ) {
6593 // Include it if first and we're wrapping
6594 first_misparented = 0;
6595 }
6596 }
6597 else {
6598 if ( last_misparented >= 0 )
6599 first_misparented = i+1;
6600 if ( last_visible_child < 0 )
6601 last_visible_child = i;
6602 }
6603 if ( first_misparented >= 0 ) {
6604 if ( first_misparented == 0 && last_misparented == last_visible_child
6605 && getNodeId()==el_tabularBox && !did_wrap ) {
6606 // All children are misparented, and we're not css_d_table,
6607 // but we are a tabularBox!
6608 // We were most probably created here in a previous rendering,
6609 // so just set us to be the anonymous table.
6610 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6611 printf("initNodeRendMethod: (reused)wrapping unproper table children %d>%d\n",
6612 first_misparented, last_misparented);
6613 #endif
6614 setRendMethod( erm_table );
6615 initTableRendMethods( this, 0 );
6616 }
6617 else {
6618 #ifdef DEBUG_INCOMPLETE_TABLE_COMPLETION
6619 printf("initNodeRendMethod: wrapping unproper table children %d>%d\n",
6620 first_misparented, last_misparented);
6621 #endif
6622 ldomNode * tbox = boxWrapChildren(first_misparented, last_misparented, el_tabularBox);
6623 if ( tbox && !tbox->isNull() ) {
6624 tbox->initNodeStyle();
6625 tbox->setRendMethod( erm_table );
6626 initTableRendMethods( tbox, 0 );
6627 }
6628 did_wrap = true;
6629 }
6630 last_misparented = -1;
6631 first_misparented = -1;
6632 // Note:
6633 // https://www.w3.org/TR/css-tables-3/#fixup
6634 // "An anonymous table or inline-table box must be generated
6635 // around [...] If the box's parent is an inline, run-in, or
6636 // ruby box (or any box that would perform inlinification of
6637 // its children), then an inline-table box must be generated;
6638 // otherwise it must be a table box."
6639 // We don't handle the "inline parent > inline-table" rule,
6640 // because of one of the first checks at top of this function:
6641 // if this node (the parent) is css_d_inline, we didn't have
6642 // any detectChildTypes() and autoBoxing happening, stayed erm_inline
6643 // and didn't enter this section to do the tabularBox wrapping.
6644 // Changing this (incorrect) rule for css_d_inline opens many
6645 // bigger issues, so let's not support this (rare) case here.
6646 // So:
6647 // <div>Some text <span style="display: table-cell">table-cell</span> and more text.</div>
6648 // will properly have the cell tabularBoxes'ed, which will be
6649 // inserted between 2 autoBoxing (the text nodes), because their
6650 // container is css_d_block DIV.
6651 // While:
6652 // <div><span>Some text <span style="display: table-cell">table-cell</span> and more text.</span></div>
6653 // as the container is a css_d_inline SPAN, nothing will happen
6654 // and everything will be reset to erm_inline. The parent DIV
6655 // will just see that it contains a single erm_inline SPAN,
6656 // and won't do any boxing.
6657 }
6658 }
6659 }
6660
6661 if ( d == css_d_ruby && BLOCK_RENDERING(rend_flags, ENHANCED) ) {
6662 // Ruby input can be quite loose and have various tag strategies (mono/group,
6663 // interleaved/tabular, double sided). Moreover, the specs have evolved between
6664 // 2001 and 2020 (<rbc> tag no more mentioned in 2020; <rtc> being just another
6665 // semantic container for Mozilla, and can be preceded by a bunch of <rt> which
6666 // are pronunciation containers, that don't have to be in an <rtc>...)
6667 // Moreover, various samples on the following pages don't close tags, and expect
6668 // the HTML parser to do that. We do that only when parsing .html files, but
6669 // we don't when parsing .epub files as they are expected to be balanced XHTML.
6670 //
6671 // References:
6672 // https://www.w3.org/International/articles/ruby/markup
6673 // https://www.w3.org/TR/ruby-use-cases/ differences between XHTML, HTML5 & HTML Extensions
6674 // https://www.w3.org/TR/ruby/ Ruby Annotation, 2001
6675 // http://darobin.github.io/html-ruby/ HTML Ruby Markup Extensions, 2015
6676 // https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-ruby-element HTML Living standard
6677 // https://drafts.csswg.org/css-ruby/ CSS Ruby Layout, 2020
6678 // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/rtc
6679 // https://chenhuijing.com/blog/html-ruby/ All about the HTML <ruby> element (in 2016)
6680 // https://github.com/w3c/html/issues/291 How to handle legacy Ruby content that may use <rbc>?
6681 // https://w3c.github.io/i18n-tests/results/ruby-html Browsers support
6682 //
6683 // We can handle quite a few of these variations with the following strategy.
6684 //
6685 // We want a <ruby> (which will stay inline) to only contain inlineBox>rubyBox elements
6686 // that will be set up to be rendered just as an inline-table:
6687 // <ruby, "display: ruby", erm_inline>
6688 // <inlineBox, erm_inline> [1 or more, 1 per ruby segment]
6689 // <rubyBox, erm_table> [1]
6690 // <rbc or rubyBox, erm_table_row> [1]
6691 // <rb or rubyBox, erm_final> base text </rb or /rubyBox> [1 or more]
6692 // </rbc or /rubyBox>
6693 // <rtc or rubyBox, erm_table_row> [1 or more, usually 1 or 2]
6694 // <rt or rubyBox, erm_final> annotation text </rt or /rubyBox> [1 or more]
6695 // </rtc or /rubyBox>
6696 // </rubyBox>
6697 // </inlineBox>
6698 // [some possible empty space text nodes between ruby segments]
6699 // </ruby>
6700 //
6701 // (The re-ordering of the table rows, putting the first "rtc" above the "rbc",
6702 // will be done in renderTable(), as it is just needed there in its own internal
6703 // table data structures. The DOM will stay in its original order: the "rbc"
6704 // staying before followup "rtc", which will give us the correct baseline to use
6705 // for the whole structure: the baseline of the "rbc".
6706 //
6707 // We need to build all this when we meet a simple:
6708 // <ruby>text1<rt>annot1</rt>text2<rt>annot2</rt> </ruby>
6709 // The only element we'll nearly always find inside a <ruby> is <rt>,
6710 // (but we can find sometimes a single <rtc> with no <rt>).
6711 //
6712 // One thing we might not handle well is white-space, which, depending on where
6713 // it happens, should be dropped or not. We drop some by putting it between table
6714 // elements, we keep some by putting it between the inlineBoxes, but not really
6715 // according to the complex rules in https://drafts.csswg.org/css-ruby/#box-fixup
6716 //
6717 // Some other notes:
6718 // - We can style some ruby elements, including some of the rubyBox we add, with:
6719 // rt, rubyBox[T=rt] { font-size: 50%; font-variant-east-asian: ruby; }
6720 // rubyBox { border: 1px solid green; }
6721 // - Note that on initial loading (HTML parsing, and this boxing here happening,
6722 // the real ruby sub-elements present in the HTML will already be there in the
6723 // DOM and have their style set, possibly inherited from their parent (the <ruby>
6724 // element) *before* this boxing is happening. If we add a rubyBox, and it
6725 // becomes the parent of a rb or rt, these rb or rt won't inherite from the
6726 // rubyBox (that we may style). They also won't get styled by CSS selectors
6727 // like "rubyBox > rt".
6728 // But on a next re-renderings, as the DOM is kept, all this will happen.
6729 // So: avoid such rules, and avoid setting inherit'able properties to
6730 // the rubyBox elements; otherwise we may get different look on initial
6731 // loading and on subsequent re-renderings.
6732 // - With some ruby constructs, the behaviour and rendering might be different
6733 // whether we're parsing a HTML file or an EPUB file:
6734 // - the HTML parser is able to auto-close tags, which is needed with most
6735 // of the samples in the above URLs (but may fail on nested ruby with
6736 // unbalanced tags, as auto-closing in one ruby might kill the other).
6737 // - the EPUB XHTML parser expects balanced tags, and may work with nested
6738 // ruby, but will not process ruby with unbalanced tags.
6739
6740 // To make things easier to follow below (with the amount of nested rubyBoxes...),
6741 // we name the variables used to hold each of them:
6742 // ibox1 : the inlineBox wrapping the 1st level rubyBox that will be erm_table (inline-table)
6743 // rbox1 : the 1st level rubyBox that will be erm_table
6744 // rbox2 : the 2nd level rubyBox that will be erm_table_row, like existing <rbc> and <rtc>
6745 // rbox3 : the 3rd level rubyBox that will be a table cell (erm_final or erm_block), like existing <rb> and <rt>
6746
6747 // Check if we have already wrapped: we should contain only <inlineBox>'ed <rubyBox>es
6748 // Note that <ruby style="display: ruby"> is all that is required to trigger this. When
6749 // wanting to disable ruby support, it's enough to just set <ruby> to "display: inline":
6750 // a change in "display:" value will cause a nodeDisplayStyleHash mismatch, and propose
6751 // a full reload with DOM rebuild, which will forget all the rubyBox we added.
6752 int len = getChildCount();
6753 bool needs_wrapping = len > 0;
6754 for ( int i=0; i<len; i++ ) {
6755 ldomNode * child = getChildNode(i);
6756 if ( child->isElement() && child->getNodeId() == el_inlineBox
6757 && child->getChildCount() > 0 && child->getChildNode(0)->getNodeId() == el_rubyBox ) {
6758 // If we find one <inlineBox><rubyBox>, we created that previously and we ensured
6759 // there are only rubyBoxes, empty text nodes, or some trailing inline nodes
6760 // not followed by a <rt>: no need for more checks and work.
6761 needs_wrapping = false;
6762 break;
6763 }
6764 }
6765 if ( needs_wrapping ) {
6766 // 1) Wrap everything up to (and including consecutive ones) <rt> <rtc> <rp>
6767 // into <inlineBox><rubyBox>, and continue doing it after that.
6768 int first_to_wrap = -1;
6769 int last_to_wrap = -1;
6770 for ( int i=0; i<=len; i++ ) {
6771 ldomNode * child;
6772 lInt16 elemId;
6773 bool eoc = i == len; // end of children
6774 if ( !eoc ) {
6775 child = getChildNode(i);
6776 if ( child->isElement() ) {
6777 elemId = child->getNodeId();
6778 }
6779 else {
6780 lString32 s = child->getText();
6781 elemId = IsEmptySpace(s.c_str(), s.length()) ? -2 : -1;
6782 // When meeting an empty space (elemId==-2), we'll delay wrapping
6783 // decision to when we process the next node.
6784 // We'll also not start a wrap with it.
6785 }
6786 }
6787 if ( last_to_wrap >= 0 && (eoc || (elemId != el_rt && elemId != el_rtc && elemId != el_rp && elemId != -2) ) ) {
6788 if ( first_to_wrap < 0 )
6789 first_to_wrap = 0;
6790 ldomNode * rbox1 = boxWrapChildren(first_to_wrap, last_to_wrap, el_rubyBox);
6791 if ( rbox1 && !rbox1->isNull() ) {
6792 // Set an attribute for the kind of container we made (Ruby Segment)
6793 // so we can style it via CSS.
6794 rbox1->setAttributeValue(LXML_NS_NONE, attr_T, U"rseg");
6795 rbox1->initNodeStyle();
6796 // Update loop index and end
6797 int removed = last_to_wrap - first_to_wrap;
6798 i = i - removed;
6799 len = len - removed;
6800 // And wrap this rubyBox in an inlineBox
6801 ldomNode * ibox1 = insertChildElement( first_to_wrap, LXML_NS_NONE, el_inlineBox );
6802 moveItemsTo( ibox1, first_to_wrap+1, first_to_wrap+1 );
6803 ibox1->initNodeStyle();
6804 }
6805 first_to_wrap = -1;
6806 last_to_wrap = -1;
6807 }
6808 if (eoc)
6809 break;
6810 if ( elemId == -1 ) { // isText(), non empty
6811 if ( first_to_wrap < 0 ) {
6812 first_to_wrap = i;
6813 }
6814 }
6815 else if ( elemId == -2 ) { // isText(), empty
6816 // Don't start a wrap on it
6817 }
6818 else {
6819 if ( first_to_wrap < 0 ) {
6820 first_to_wrap = i;
6821 }
6822 if ( elemId == el_rt || elemId == el_rtc || elemId == el_rp ) {
6823 last_to_wrap = i;
6824 // Don't wrap yet: there can be followup other RT/RTC
6825 }
6826 }
6827 }
6828 // 2) Enter each rubyBox we have created (they will be inline-table),
6829 // and wrap its content as needed to make rows (of rubyBox, rbc and rtc)
6830 // and cells (of rubyBox, rb and rt).
6831 len = getChildCount();
6832 for ( int i=0; i<len; i++ ) {
6833 ldomNode * ibox1 = getChildNode(i);
6834 if ( !ibox1->isElement() || ibox1->getNodeId() != el_inlineBox )
6835 continue;
6836 ldomNode * rbox1 = ibox1->getChildCount() > 0 ? ibox1->getChildNode(0) : NULL;
6837 if ( !rbox1 || !rbox1->isElement() || rbox1->getNodeId() != el_rubyBox )
6838 continue;
6839 // (Each rbox1 will be set erm_table)
6840 int len1 = rbox1->getChildCount();
6841 int first_to_wrap = -1;
6842 bool ruby_base_wrap_done = false;
6843 bool ruby_base_present = false;
6844 for ( int i1=0; i1<=len1; i1++ ) {
6845 ldomNode * child;
6846 lInt16 elemId;
6847 bool eoc = i1 == len1; // end of children
6848 if ( !eoc ) {
6849 child = rbox1->getChildNode(i1);
6850 if ( child->isElement() ) {
6851 elemId = child->getNodeId();
6852 }
6853 else {
6854 lString32 s = child->getText();
6855 elemId = IsEmptySpace(s.c_str(), s.length()) ? -2 : -1;
6856 // When meeting an empty space (elemId==-2), we'll delay wrapping
6857 // decision to when we process the next node.
6858 // We'll also not start a wrap with it.
6859 }
6860 }
6861 if ( first_to_wrap >= 0 && (
6862 eoc
6863 || ( !ruby_base_wrap_done && (elemId == el_rtc || elemId == el_rt || elemId == el_rp) )
6864 || ( ruby_base_wrap_done && elemId == el_rtc )
6865 ) ) {
6866 ldomNode * rbox2 = rbox1->boxWrapChildren(first_to_wrap, i1-1, el_rubyBox);
6867 if ( rbox2 && !rbox2->isNull() ) {
6868 // Set an attribute for the kind of container we made (<rbc> or <rtc>-like),
6869 // so we can style it like real <rbc> and <rtc> via CSS.
6870 rbox2->setAttributeValue(LXML_NS_NONE, attr_T, ruby_base_wrap_done ? U"rtc" : U"rbc");
6871 rbox2->initNodeStyle();
6872 // Update loop index and end
6873 int removed = i1-1 - first_to_wrap;
6874 i1 = i1 - removed;
6875 len1 = len1 - removed;
6876 }
6877 first_to_wrap = -1;
6878 if ( !eoc && !ruby_base_wrap_done ) {
6879 ruby_base_present = true; // We did create it
6880 }
6881 if (eoc)
6882 break;
6883 }
6884 if ( elemId == -1 ) { // isText(), non empty
6885 if ( first_to_wrap < 0 ) {
6886 first_to_wrap = i1;
6887 }
6888 }
6889 else if ( elemId == -2 ) { // isText(), empty
6890 // Don't start a wrap on it
6891 }
6892 else {
6893 if ( elemId == el_rbc || elemId == el_rtc ) {
6894 // These are fine containers at this level.
6895 // (If el_rbc, we shouldn't have found anything before
6896 // it; if we did, just ignore it.)
6897 first_to_wrap = -1;
6898 ruby_base_wrap_done = true;
6899 if ( elemId == el_rbc )
6900 ruby_base_present = true;
6901 }
6902 else if ( first_to_wrap < 0 ) {
6903 first_to_wrap = i1;
6904 if ( elemId == el_rt || elemId == el_rp ) {
6905 ruby_base_wrap_done = true;
6906 }
6907 }
6908 }
6909 }
6910 if ( !ruby_base_present ) {
6911 // <ruby><rt>annotation</rt></ruby> : add rubyBox for empty base text
6912 ldomNode * rbox2 = rbox1->insertChildElement( 0, LXML_NS_NONE, el_rubyBox );
6913 rbox2->setAttributeValue(LXML_NS_NONE, attr_T, U"rbc");
6914 rbox2->initNodeStyle();
6915 }
6916 // rbox1 now contains only <rbc>, <rtc> or <rubyBox> (which will be set erm_table_row)
6917 // 3) for each, ensure its content is <rb>, <rt>, and if not, wrap it in
6918 // a <rubyBox> (these will be all like table cells, set erm_final)
6919 len1 = rbox1->getChildCount();
6920 bool ruby_base_seen = false;
6921 for ( int i1=0; i1<len1; i1++ ) {
6922 ldomNode * rbox2 = rbox1->getChildNode(i1);
6923 if ( !rbox2->isElement() )
6924 continue;
6925 lInt16 elemId = rbox2->getNodeId();
6926 lInt16 expected_child_elem_id;
6927 if ( elemId == el_rbc ) {
6928 expected_child_elem_id = el_rb;
6929 }
6930 else if ( elemId == el_rtc ) {
6931 expected_child_elem_id = el_rt;
6932 }
6933 else if ( elemId == el_rubyBox ) {
6934 expected_child_elem_id = ruby_base_seen ? el_rt : el_rb;
6935 }
6936 else { // unexpected
6937 continue;
6938 }
6939 ruby_base_seen = true; // We're passing by a container, the first one being the base
6940 bool has_expected = false;
6941 int len2 = rbox2->getChildCount();
6942 for ( int i2=0; i2<len2; i2++ ) {
6943 ldomNode * child = rbox2->getChildNode(i2);
6944 lInt16 childElemId = child->isElement() ? child->getNodeId() : -1;
6945 if ( childElemId == expected_child_elem_id ) {
6946 // If a single expected is found, assume everything is fine
6947 // (other badly wrapped elements will just be ignored and invisible)
6948 has_expected = true;
6949 break;
6950 }
6951 }
6952 if ( !has_expected ) {
6953 // Wrap everything into a rubyBox
6954 if ( len2 > 0 ) { // some children to wrap
6955 ldomNode * rbox3 = rbox2->boxWrapChildren(0, len2-1, el_rubyBox);
6956 if ( rbox3 && !rbox3->isNull() ) {
6957 rbox3->setAttributeValue(LXML_NS_NONE, attr_T, expected_child_elem_id == el_rb ? U"rb" : U"rt");
6958 if ( elemId == el_rtc ) {
6959 // Firefox makes a <rtc>text</rtc> (without any <rt>) span the whole involved base
6960 rbox3->setAttributeValue(LXML_NS_NONE, attr_rbspan, U"99"); // (our max supported)
6961 }
6962 rbox3->initNodeStyle();
6963 }
6964 }
6965 else { // no child to wrap
6966 // We need to insert an empty element to play the role of a <td> for
6967 // the table rendering code to work correctly.
6968 ldomNode * rbox3 = rbox2->insertChildElement( 0, LXML_NS_NONE, el_rubyBox );
6969 rbox3->setAttributeValue(LXML_NS_NONE, attr_T, expected_child_elem_id == el_rb ? U"rb" : U"rt");
6970 rbox3->initNodeStyle();
6971 // We need to add some text for the cell to ensure its height.
6972 // We add a ZERO WIDTH SPACE, which will not collapse into nothing
6973 rbox3->insertChildText(U"\x200B");
6974 }
6975 }
6976 }
6977 }
6978 }
6979 // All wrapping done, or assumed to have already been done correctly.
6980 // We can set the rendering methods to make all this a table.
6981 // All unexpected elements will be erm_invisible
6982 len = getChildCount();
6983 for ( int i=0; i<len; i++ ) {
6984 ldomNode * ibox1 = getChildNode(i);
6985 if ( !ibox1->isElement() || ibox1->getNodeId() != el_inlineBox )
6986 continue;
6987 ibox1->setRendMethod( erm_inline );
6988 ldomNode * rbox1 = ibox1->getChildCount() > 0 ? ibox1->getChildNode(0) : NULL;
6989 if ( rbox1 && rbox1->isElement() && rbox1->getNodeId() == el_rubyBox ) {
6990 // First level rubyBox: each will be an inline table
6991 rbox1->setRendMethod( erm_table );
6992 int len1 = rbox1->getChildCount();
6993 for ( int i1=0; i1<len1; i1++ ) {
6994 ldomNode * rbox2 = rbox1->getChildNode(i1);
6995 if ( rbox2->isElement() ) {
6996 rbox2->setRendMethod( erm_invisible );
6997 lInt16 rb2elemId = rbox2->getNodeId();
6998 if ( rb2elemId == el_rubyBox || rb2elemId == el_rbc || rb2elemId == el_rtc ) {
6999 // Second level rubyBox: each will be a table row
7000 rbox2->setRendMethod( erm_table_row );
7001 int len2 = rbox2->getChildCount();
7002 for ( int i2=0; i2<len2; i2++ ) {
7003 ldomNode * rbox3 = rbox2->getChildNode(i2);
7004 if ( rbox3->isElement() ) {
7005 rbox3->setRendMethod( erm_invisible );
7006 lInt16 rb3elemId = rbox3->getNodeId();
7007 if ( rb3elemId == el_rubyBox || rb3elemId == el_rb || rb3elemId == el_rt ) {
7008 // Third level rubyBox: each will be a table cell.
7009 // (As all it content has previously been reset to erm_inline)
7010 // /\ This is no more true, but we expect to find inline
7011 // content, with possibly some nested ruby.
7012 // We can have the cell erm_final.
7013 rbox3->setRendMethod( erm_final );
7014 }
7015 // We let <rp> be invisible like other unexpected elements
7016 }
7017 }
7018 }
7019 }
7020 }
7021 }
7022 }
7023 }
7024
7025 bool handled_as_float = false;
7026 if (BLOCK_RENDERING(rend_flags, WRAP_FLOATS)) {
7027 // While loading the document, we want to put any element with float:left/right
7028 // inside an internal floatBox element with no margin in its style: this
7029 // floatBox's RenderRectAccessor will have the position and width/height
7030 // of the outer element (with margins inside), while the RenderRectAccessor
7031 // of the wrapped original element itself will have the w/h of the element,
7032 // including borders but excluding margins (as it is done for all elements
7033 // by crengine).
7034 // That makes out the following rules:
7035 // - a floatBox has a single child: the original floating element.
7036 // - a non-floatBox element with style->float_ must be wrapped in a floatBox
7037 // which will get the same style->float_ (happens in the initial document
7038 // loading)
7039 // - if it already has a floatBox parent, no need to do it again, just ensure
7040 // the style->float_ are the same (happens when re-rendering)
7041 // - if the element has lost its style->float_ (style tweak applied), or
7042 // WRAP_FLOATS disabled, as we can't remove the floatBox (we can't
7043 // modify the DOM once a cache has been made): update the floatBox's
7044 // style->float_ and style->display and rendering method to be the same
7045 // as the element: this will limit the display degradation when such
7046 // change happen (but a full re-loading will still be suggested to the
7047 // user, and should probably be accepted).
7048 // So, to allow toggling FLOAT_FLOATBOXES with less chance of getting
7049 // a _nodeDisplayStyleHash change (and so, a need for document reloading),
7050 // it's best to use WRAP_FLOATS even when flat rendering is requested.
7051 //
7052 // Note that, when called in the XML loading phase, we can't update
7053 // a node style (with getStyle(), copystyle(), setStyle()) as, for some reason
7054 // not pinpointed, it could affect and mess with the upcoming node parsing.
7055 // We can just set the style of an element we add (and only once, setting it
7056 // twice would cause the same mess). But in the re-rendering phase, we can
7057 // update a node style as much as we want.
7058 bool isFloating = getStyle()->float_ > css_f_none;
7059 bool isFloatBox = (getNodeId() == el_floatBox);
7060 if ( isFloating || isFloatBox ) {
7061 handled_as_float = true;
7062 ldomNode * parent = getParentNode();
7063 bool isFloatBoxChild = (parent && (parent->getNodeId() == el_floatBox));
7064 if ( isFloatBox ) {
7065 // Wrapping floatBox already made
7066 if (getChildCount() != 1) {
7067 CRLog::error("floatBox with zero or more than one child");
7068 crFatalError();
7069 }
7070 // Update floatBox style according to child's one
7071 ldomNode * child = getChildNode(0);
7072 css_style_ref_t child_style = child->getStyle();
7073 css_style_ref_t my_style = getStyle();
7074 css_style_ref_t my_new_style( new css_style_rec_t );
7075 copystyle(my_style, my_new_style);
7076 my_new_style->float_ = child_style->float_;
7077 if (child_style->display <= css_d_inline) { // when !PREPARE_FLOATBOXES
7078 my_new_style->display = css_d_inline; // become an inline wrapper
7079 }
7080 else if (child_style->display == css_d_none) {
7081 my_new_style->display = css_d_none; // stay invisible
7082 }
7083 else { // everything else (including tables) must be wrapped by a block
7084 my_new_style->display = css_d_block;
7085 }
7086 setStyle(my_new_style);
7087 // When re-rendering, setNodeStyle() has already been called to set
7088 // our style and font, so no need for calling initNodeFont() here,
7089 // as we didn't change anything related to font in the style (and
7090 // calling it can cause a style hash mismatch for some reason).
7091
7092 // Update floatBox rendering method according to child's one
7093 // It should be erm_block by default (the child can be erm_final
7094 // if it contains text), except if the child has stayed inline
7095 // when !PREPARE_FLOATBOXES
7096 if (child->getRendMethod() == erm_inline)
7097 setRendMethod( erm_inline );
7098 else if (child->getRendMethod() == erm_invisible)
7099 setRendMethod( erm_invisible );
7100 else
7101 setRendMethod( erm_block );
7102 }
7103 else if ( isFloatBoxChild ) {
7104 // Already floatBox'ed, nothing special to do
7105 }
7106 else if ( parent ) { // !isFloatBox && !isFloatBoxChild
7107 // Element with float:, that has not been yet wrapped in a floatBox.
7108 // Replace this element with a floatBox in its parent children collection,
7109 // and move it inside, as the single child of this floatBox.
7110 int pos = getNodeIndex();
7111 ldomNode * fbox = parent->insertChildElement( pos, LXML_NS_NONE, el_floatBox );
7112 parent->moveItemsTo( fbox, pos+1, pos+1 ); // move this element from parent into fbox
7113
7114 // If we have float:, this just-created floatBox should be erm_block,
7115 // unless the child has been kept inline
7116 if ( !BLOCK_RENDERING(rend_flags, PREPARE_FLOATBOXES) && getRendMethod() == erm_inline)
7117 fbox->setRendMethod( erm_inline );
7118 else
7119 fbox->setRendMethod( erm_block );
7120
7121 // We want this floatBox to have no real style (and it surely
7122 // should not have the margins of the child), but it should probably
7123 // have the inherited properties of the node parent, just like the child
7124 // had them. We can't just copy the parent style into this floatBox, as
7125 // we don't want its non-inherited properties like background-color which
7126 // could be drawn over some other content if this float has some negative
7127 // margins.
7128 // So, we can't really do this:
7129 // // Move float and display from me into my new fbox parent
7130 // css_style_ref_t mystyle = getStyle();
7131 // css_style_ref_t parentstyle = parent->getStyle();
7132 // css_style_ref_t fboxstyle( new css_style_rec_t );
7133 // copystyle(parentstyle, fboxstyle);
7134 // fboxstyle->float_ = mystyle->float_;
7135 // fboxstyle->display = mystyle->display;
7136 // fbox->setStyle(fboxstyle);
7137 // fbox->initNodeFont();
7138 //
7139 // Best to use lvrend.cpp setNodeStyle(), which will properly set
7140 // this new node style with inherited properties from its parent,
7141 // and we made it do this specific propagation of float_ and
7142 // display from its single children, only when it has styles
7143 // defined (so, only on initial loading and not on re-renderings).
7144 setNodeStyle( fbox, parent->getStyle(), parent->getFont() );
7145
7146 // We would have liked to reset style->float_ to none in the
7147 // node we moved in the floatBox, for correctness sake.
7148 // css_style_ref_t mynewstyle( new css_style_rec_t );
7149 // copystyle(mystyle, mynewstyle);
7150 // mynewstyle->float_ = css_f_none;
7151 // mynewstyle->display = css_d_block;
7152 // setStyle(mynewstyle);
7153 // initNodeFont();
7154 // Unfortunatly, we can't yet re-set a style while the DOM
7155 // is still being built (as we may be called during the loading
7156 // phase) without many font glitches.
7157 // So, we'll have a floatBox with float: that contains a span
7158 // or div with float: - the rendering code may have to check
7159 // for that: ->isFloatingBox() was added for that.
7160 }
7161 }
7162 }
7163
7164 // (If a node is both inline-block and float: left/right, float wins.)
7165 if (BLOCK_RENDERING(rend_flags, BOX_INLINE_BLOCKS) && !handled_as_float) {
7166 // (Similar to what we do above for floats, but simpler.)
7167 // While loading the document, we want to put any element with
7168 // display: inline-block or inline-table inside an internal inlineBox
7169 // element with no margin in its style: this inlineBox's RenderRectAccessor
7170 // will have the width/height of the outer element (with margins inside),
7171 // while the RenderRectAccessor of the wrapped original element itself
7172 // will have the w/h of the element, including borders but excluding
7173 // margins (as it is done for all elements by crengine).
7174 // That makes out the following rules:
7175 // - a inlineBox has a single child: the original inline-block element.
7176 // - an element with style->display: inline-block/inline-table must be
7177 // wrapped in a inlineBox, which will get the same style->vertical_align
7178 // (happens in the initial document loading)
7179 // - if it already has a inlineBox parent, no need to do it again, just ensure
7180 // the style->vertical_align are the same (happens when re-rendering)
7181 // - if the element has lost its style->display: inline-block (style tweak
7182 // applied), or BOX_INLINE_BLOCKS disabled, as we can't remove the
7183 // inlineBox (we can't modify the DOM once a cache has been made):
7184 // the inlineBox and its children will both be set to erm_inline
7185 // (but as ->display has changed, a full re-loading will be suggested
7186 // to the user, and should probably be accepted).
7187 // - a inlineBox has ALWAYS ->display=css_d_inline and erm_method=erm_inline
7188 // - a inlineBox child keeps its original ->display, and may have
7189 // erm_method = erm_final or erm_block (depending on its content)
7190 bool isInlineBlock = (d == css_d_inline_block || d == css_d_inline_table);
7191 bool isInlineBox = (getNodeId() == el_inlineBox);
7192 if ( isInlineBlock || isInlineBox ) {
7193 ldomNode * parent = getParentNode();
7194 bool isInlineBoxChild = (parent && (parent->getNodeId() == el_inlineBox));
7195 if ( isInlineBox ) {
7196 // Wrapping inlineBox already made
7197 if (getChildCount() != 1) {
7198 CRLog::error("inlineBox with zero or more than one child");
7199 crFatalError();
7200 }
7201 // Update inlineBox style according to child's one
7202 ldomNode * child = getChildNode(0);
7203 css_style_ref_t child_style = child->getStyle();
7204 css_style_ref_t my_style = getStyle();
7205 css_style_ref_t my_new_style( new css_style_rec_t );
7206 copystyle(my_style, my_new_style);
7207 if (child_style->display == css_d_inline_block || child_style->display == css_d_inline_table) {
7208 my_new_style->display = css_d_inline; // become an inline wrapper
7209 // We need it to have the vertical_align from the child
7210 // (it's the only style we need for proper inline layout).
7211 my_new_style->vertical_align = child_style->vertical_align;
7212 setRendMethod( erm_inline );
7213 }
7214 else if ( isEmbeddedBlockBoxingInlineBox(true) ) {
7215 my_new_style->display = css_d_inline; // wrap bogus "block among inlines" in inline
7216 setRendMethod( erm_inline );
7217 }
7218 else if (child_style->display <= css_d_inline) {
7219 my_new_style->display = css_d_inline; // wrap inline in inline
7220 setRendMethod( erm_inline );
7221 }
7222 else if (child_style->display == css_d_none) {
7223 my_new_style->display = css_d_none; // stay invisible
7224 setRendMethod( erm_invisible );
7225 }
7226 else { // everything else must be wrapped by a block
7227 my_new_style->display = css_d_block;
7228 setRendMethod( erm_block );
7229 }
7230 setStyle(my_new_style);
7231 // When re-rendering, setNodeStyle() has already been called to set
7232 // our style and font, so no need for calling initNodeFont() here,
7233 // as we didn't change anything related to font in the style (and
7234 // calling it can cause a style hash mismatch for some reason).
7235 }
7236 else if ( isInlineBoxChild ) {
7237 // Already inlineBox'ed, nothing special to do
7238 }
7239 else if ( parent ) { // !isInlineBox && !isInlineBoxChild
7240 // Element with display: inline-block/inline-table, that has not yet
7241 // been wrapped in a inlineBox.
7242 // Replace this element with a inlineBox in its parent children collection,
7243 // and move it inside, as the single child of this inlineBox.
7244 int pos = getNodeIndex();
7245 ldomNode * ibox = parent->insertChildElement( pos, LXML_NS_NONE, el_inlineBox );
7246 parent->moveItemsTo( ibox, pos+1, pos+1 ); // move this element from parent into ibox
7247 ibox->setRendMethod( erm_inline );
7248
7249 // We want this inlineBox to have no real style (and it surely
7250 // should not have the margins of the child), but it should probably
7251 // have the inherited properties of the node parent, just like the child
7252 // had them. We can't just copy the parent style into this inlineBox, as
7253 // we don't want its non-inherited properties like background-color which
7254 // could be drawn over some other content if this float has some negative
7255 // margins.
7256 // Best to use lvrend.cpp setNodeStyle(), which will properly set
7257 // this new node style with inherited properties from its parent,
7258 // and we made it do this specific propagation of vertical_align
7259 // from its single child, only when it has styles defined (so,
7260 // only on initial loading and not on re-renderings).
7261 setNodeStyle( ibox, parent->getStyle(), parent->getFont() );
7262 }
7263 }
7264 }
7265 }
7266 #endif
7267
onBodyExit()7268 void ldomElementWriter::onBodyExit()
7269 {
7270 if ( _isSection )
7271 updateTocItem();
7272
7273 #if BUILD_LITE!=1
7274 if ( !_document->isDefStyleSet() )
7275 return;
7276 if ( !_bodyEnterCalled ) {
7277 onBodyEnter();
7278 }
7279 if ( _pseudoElementAfterChildIndex >= 0 ) {
7280 if ( _pseudoElementAfterChildIndex != _element->getChildCount()-1 ) {
7281 // Not the last child: move it there
7282 // (moveItemsTo() works just fine when the source node is also the
7283 // target node: remove it, and re-add it, so, adding it at the end)
7284 _element->moveItemsTo( _element, _pseudoElementAfterChildIndex, _pseudoElementAfterChildIndex);
7285 }
7286 // Now that all the real children of this node have had their
7287 // style set, we can init the style of the "After" pseudo
7288 // element, and its rend method as it has no children.
7289 ldomNode * child = _element->getChildNode(_element->getChildCount()-1);
7290 child->initNodeStyle();
7291 child->initNodeRendMethod();
7292 }
7293 // if ( _element->getStyle().isNull() ) {
7294 // lString32 path;
7295 // ldomNode * p = _element->getParentNode();
7296 // while (p) {
7297 // path = p->getNodeName() + U"/" + path;
7298 // p = p->getParentNode();
7299 // }
7300 // //CRLog::error("style not initialized for element 0x%04x %s path %s", _element->getDataIndex(), LCSTR(_element->getNodeName()), LCSTR(path));
7301 // crFatalError();
7302 // }
7303 _element->initNodeRendMethod();
7304
7305 if ( _stylesheetIsSet )
7306 _document->getStyleSheet()->pop();
7307 #endif
7308 }
7309
onText(const lChar32 * text,int len,lUInt32,bool insert_before_last_child)7310 void ldomElementWriter::onText( const lChar32 * text, int len, lUInt32, bool insert_before_last_child )
7311 {
7312 //logfile << "{t";
7313 {
7314 // normal mode: store text copy
7315 // add text node, if not first empty space string of block node
7316 if ( !_isBlock || _element->getChildCount()!=0 || !IsEmptySpace( text, len ) || (_flags&TXTFLG_PRE) ) {
7317 lString8 s8 = UnicodeToUtf8(text, len);
7318 _element->insertChildText(s8, insert_before_last_child);
7319 } else {
7320 //CRLog::trace("ldomElementWriter::onText: Ignoring first empty space of block item");
7321 }
7322 }
7323 //logfile << "}";
7324 }
7325
7326
7327 //#define DISABLE_STYLESHEET_REL
7328 #if BUILD_LITE!=1
7329 /// if stylesheet file name is set, and file is found, set stylesheet to its value
applyNodeStylesheet()7330 bool ldomNode::applyNodeStylesheet()
7331 {
7332 #ifndef DISABLE_STYLESHEET_REL
7333 CRLog::trace("ldomNode::applyNodeStylesheet()");
7334 if ( !getDocument()->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) // internal styles are disabled
7335 return false;
7336
7337 if ( getNodeId() != el_DocFragment && getNodeId() != el_body )
7338 return false;
7339 if ( getNodeId() == el_DocFragment && getDocument()->getContainer().isNull() )
7340 return false;
7341
7342 // Here, we apply internal stylesheets that have been saved as attribute or
7343 // child element by the HTML parser for EPUB or plain HTML documents.
7344
7345 // For epub documents, for each included .html in the epub, the first css
7346 // file link may have been put as the value of an added attribute to
7347 // the <DocFragment> element:
7348 // <DocFragment StyleSheet="path to css file">
7349 //
7350 // For epub and html documents, the content of one or more <head><style>
7351 // elements, as well as all (only the 2nd++ for epub) linked css files,
7352 // with @import url(), have been put into an added child element:
7353 // <DocFragment><stylesheet>css content</stylesheet><body>...</body></DocFragment>
7354 // <body><stylesheet>css content</stylesheet>...</body>
7355
7356 bool stylesheetChanged = false;
7357
7358 if ( getNodeId() == el_DocFragment && hasAttribute(attr_StyleSheet) ) {
7359 getDocument()->_stylesheet.push();
7360 stylesheetChanged = getDocument()->parseStyleSheet(getAttributeValue(attr_StyleSheet));
7361 if ( !stylesheetChanged )
7362 getDocument()->_stylesheet.pop();
7363 }
7364 if ( getChildCount() > 0 ) {
7365 ldomNode *styleNode = getChildNode(0);
7366
7367 if ( styleNode && styleNode->getNodeId()==el_stylesheet ) {
7368 if ( false == stylesheetChanged) {
7369 getDocument()->_stylesheet.push();
7370 }
7371 if ( getDocument()->parseStyleSheet(styleNode->getAttributeValue(attr_href),
7372 styleNode->getText()) ) {
7373 stylesheetChanged = true;
7374 } else if (false == stylesheetChanged) {
7375 getDocument()->_stylesheet.pop();
7376 }
7377 }
7378 }
7379 return stylesheetChanged;
7380 #endif
7381 return false;
7382 }
7383 #endif
7384
addAttribute(lUInt16 nsid,lUInt16 id,const lChar32 * value)7385 void ldomElementWriter::addAttribute( lUInt16 nsid, lUInt16 id, const lChar32 * value )
7386 {
7387 getElement()->setAttributeValue(nsid, id, value);
7388 #if BUILD_LITE!=1
7389 /* This is now done by ldomDocumentFragmentWriter::OnTagOpen() directly,
7390 * as we need to do it too for <DocFragment><stylesheet> tag, and not
7391 * only for <DocFragment StyleSheet="path_to_css_1st_file"> attribute.
7392 if ( id==attr_StyleSheet ) {
7393 _stylesheetIsSet = _element->applyNodeStylesheet();
7394 }
7395 */
7396 #endif
7397 }
7398
pop(ldomElementWriter * obj,lUInt16 id)7399 ldomElementWriter * ldomDocumentWriter::pop( ldomElementWriter * obj, lUInt16 id )
7400 {
7401 // First check if there's an element with provided id in the stack
7402 //logfile << "{p";
7403 ldomElementWriter * tmp = obj;
7404 for ( ; tmp; tmp = tmp->_parent )
7405 {
7406 //logfile << "-";
7407 if (tmp->getElement()->getNodeId() == id)
7408 break;
7409 }
7410 //logfile << "1";
7411 if (!tmp)
7412 {
7413 // No element in the stack with provided id: nothing to close, stay at current element
7414 //logfile << "-err}";
7415 return obj; // error!!!
7416 }
7417 ldomElementWriter * tmp2 = NULL;
7418 //logfile << "2";
7419 for ( tmp = obj; tmp; tmp = tmp2 )
7420 {
7421 //logfile << "-";
7422 tmp2 = tmp->_parent;
7423 bool stop = (tmp->getElement()->getNodeId() == id);
7424 ElementCloseHandler( tmp->getElement() );
7425 delete tmp;
7426 if ( stop )
7427 return tmp2;
7428 }
7429 /*
7430 logfile << "3 * ";
7431 logfile << (int)tmp << " - " << (int)tmp2 << " | cnt=";
7432 logfile << (int)tmp->getElement()->childCount << " - "
7433 << (int)tmp2->getElement()->childCount;
7434 */
7435 //logfile << "}";
7436 return tmp2;
7437 }
7438
~ldomElementWriter()7439 ldomElementWriter::~ldomElementWriter()
7440 {
7441 //CRLog::trace("~ldomElementWriter for element 0x%04x %s", _element->getDataIndex(), LCSTR(_element->getNodeName()));
7442 //getElement()->persist();
7443 onBodyExit();
7444 }
7445
7446
7447
7448
7449 /////////////////////////////////////////////////////////////////
7450 /// ldomDocumentWriter
7451 // Used to parse expected XHTML (possibly made by crengine or helpers) for
7452 // formats: FB2, RTF, WORD, plain text, PDB(txt)
7453 // Also used for EPUB to build a single document, but driven by ldomDocumentFragmentWriter
7454 // for each individual HTML files in the EPUB.
7455 // For all these document formats, it is fed by HTMLParser that does
7456 // convert to lowercase the tag names and attributes.
7457 // ldomDocumentWriter does not do any auto-close of unbalanced tags and
7458 // expect a fully correct and balanced XHTML.
7459
7460 // overrides
OnStart(LVFileFormatParser * parser)7461 void ldomDocumentWriter::OnStart(LVFileFormatParser * parser)
7462 {
7463 //logfile << "ldomDocumentWriter::OnStart()\n";
7464 // add document root node
7465 //CRLog::trace("ldomDocumentWriter::OnStart()");
7466 if ( !_headerOnly )
7467 _stopTagId = 0xFFFE;
7468 else {
7469 _stopTagId = _document->getElementNameIndex(U"description");
7470 //CRLog::trace( "ldomDocumentWriter() : header only, tag id=%d", _stopTagId );
7471 }
7472 LVXMLParserCallback::OnStart( parser );
7473 _currNode = new ldomElementWriter(_document, 0, 0, NULL);
7474 }
7475
OnStop()7476 void ldomDocumentWriter::OnStop()
7477 {
7478 //logfile << "ldomDocumentWriter::OnStop()\n";
7479 while (_currNode)
7480 _currNode = pop( _currNode, _currNode->getElement()->getNodeId() );
7481 }
7482
7483 /// called after > of opening tag (when entering tag body)
7484 // Note to avoid confusion: all tags HAVE a body (their content), so this
7485 // is called on all tags.
7486 // But in this, we do some specifics for tags that ARE a <BODY> tag.
OnTagBody()7487 void ldomDocumentWriter::OnTagBody()
7488 {
7489 // Specific if we meet the <BODY> tag and we have styles to apply and
7490 // store in the DOM
7491 // (This can't happen with EPUBs: the ldomDocumentFragmentWriter that
7492 // drives this ldomDocumentWriter has parsed the HEAD STYLEs and LINKs
7493 // of each individual HTML file, and we see from them only their BODY:
7494 // _headStyleText and _stylesheetLinks are then empty. Styles for EPUB
7495 // are handled in :OnTagOpen() when being a DocFragment and meeting
7496 // the BODY.)
7497 if ( _currNode && _currNode->getElement() && _currNode->getElement()->isNodeName("body") &&
7498 ( !_headStyleText.empty() || _stylesheetLinks.length() > 0 ) ) {
7499 // If we're BODY, and we have meet styles in the previous HEAD
7500 // (links to css files or <STYLE> content), we need to save them
7501 // in an added <body><stylesheet> element so they are in the DOM
7502 // and saved in the cache, and found again when loading from cache
7503 // and applied again when a re-rendering is needed.
7504
7505 // Make out an aggregated single stylesheet text.
7506 // @import's need to be first in the final stylesheet text
7507 lString32 imports;
7508 for (int i = 0; i < _stylesheetLinks.length(); i++) {
7509 lString32 import("@import url(\"");
7510 import << _stylesheetLinks.at(i);
7511 import << "\");\n";
7512 imports << import;
7513 }
7514 lString32 styleText = imports + _headStyleText.c_str();
7515 _stylesheetLinks.clear();
7516 _headStyleText.clear();
7517
7518 // It's only at this point that we push() the previous stylesheet state
7519 // and apply the combined style text we made to the document:
7520 if ( _document->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) {
7521 _document->getStyleSheet()->push();
7522 _popStyleOnFinish = true; // superclass ~ldomDocumentWriter() will do the ->pop()
7523 _document->parseStyleSheet(lString32(), styleText);
7524 // printf("applied: %s\n", LCSTR(styleText));
7525 // apply any FB2 stylesheet too, so it's removed too when pop()
7526 _document->applyDocumentStyleSheet();
7527 }
7528 // We needed to add that /\ to the _document->_stylesheet before this
7529 // onBodyEnter \/, for any body {} css declaration to be available
7530 // as this onBodyEnter will apply the current _stylesheet to this BODY node.
7531 _currNode->onBodyEnter();
7532 _flags = _currNode->getFlags(); // _flags may have been updated (if white-space: pre)
7533 // And only after this we can add the <stylesheet> as a first child
7534 // element of this BODY node. It will not be displayed thanks to fb2def.h:
7535 // XS_TAG1D( stylesheet, true, css_d_none, css_ws_inherit )
7536 OnTagOpen(U"", U"stylesheet");
7537 OnTagBody();
7538 OnText(styleText.c_str(), styleText.length(), 0);
7539 OnTagClose(U"", U"stylesheet");
7540 CRLog::trace("added BODY>stylesheet child element with HEAD>STYLE&LINKS content");
7541 }
7542 else if ( _currNode ) { // for all other tags (including BODY when no style)
7543 _currNode->onBodyEnter();
7544 _flags = _currNode->getFlags(); // _flags may have been updated (if white-space: pre)
7545 }
7546 }
7547
OnTagOpen(const lChar32 * nsname,const lChar32 * tagname)7548 ldomNode * ldomDocumentWriter::OnTagOpen( const lChar32 * nsname, const lChar32 * tagname )
7549 {
7550 //logfile << "ldomDocumentWriter::OnTagOpen() [" << nsname << ":" << tagname << "]";
7551 //CRLog::trace("OnTagOpen(%s)", UnicodeToUtf8(lString32(tagname)).c_str());
7552 lUInt16 id = _document->getElementNameIndex(tagname);
7553 lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
7554
7555 // Set a flag for OnText to accumulate the content of any <HEAD><STYLE>
7556 if ( id == el_style && _currNode && _currNode->getElement()->getNodeId() == el_head ) {
7557 _inHeadStyle = true;
7558 }
7559
7560 // For EPUB, when ldomDocumentWriter is driven by ldomDocumentFragmentWriter:
7561 // if we see a BODY coming and we are a DocFragment, its time to apply the
7562 // styles set to the DocFragment before switching to BODY (so the styles can
7563 // be applied to BODY)
7564 if (id == el_body && _currNode && _currNode->_element->getNodeId() == el_DocFragment) {
7565 _currNode->_stylesheetIsSet = _currNode->getElement()->applyNodeStylesheet();
7566 // _stylesheetIsSet will be used to pop() the stylesheet when
7567 // leaving/destroying this DocFragment ldomElementWriter
7568 }
7569
7570 //if ( id==_stopTagId ) {
7571 //CRLog::trace("stop tag found, stopping...");
7572 // _parser->Stop();
7573 //}
7574 _currNode = new ldomElementWriter( _document, nsid, id, _currNode );
7575 _flags = _currNode->getFlags();
7576 //logfile << " !o!\n";
7577 //return _currNode->getElement();
7578 return _currNode->getElement();
7579 }
7580
~ldomDocumentWriter()7581 ldomDocumentWriter::~ldomDocumentWriter()
7582 {
7583 while (_currNode)
7584 _currNode = pop( _currNode, _currNode->getElement()->getNodeId() );
7585 #if BUILD_LITE!=1
7586 if ( _document->isDefStyleSet() ) {
7587 if ( _popStyleOnFinish )
7588 // pop any added styles to the original stylesheet so we get
7589 // the original one back and avoid a stylesheet hash mismatch
7590 _document->getStyleSheet()->pop();
7591 // Not sure why we would do that at end of parsing, but ok: it's
7592 // not recursive, so not expensive:
7593 _document->getRootNode()->initNodeStyle();
7594 _document->getRootNode()->initNodeFont();
7595 //if ( !_document->validateDocument() )
7596 // CRLog::error("*** document style validation failed!!!");
7597 _document->updateRenderContext();
7598 _document->dumpStatistics();
7599 if ( _document->_nodeStylesInvalidIfLoading ) {
7600 // Some pseudoclass like :last-child has been met which has set this flag
7601 // (or, with the HTML parser, foster parenting of invalid element in tables)
7602 printf("CRE: document loaded, but styles re-init needed (cause: peculiar CSS pseudoclasses met)\n");
7603 _document->_nodeStylesInvalidIfLoading = false; // show this message only once
7604 _document->forceReinitStyles();
7605 }
7606 if ( _document->hasRenderData() ) {
7607 // We have created some RenderRectAccessors, to cache some CSS check results
7608 // (i.e. :nth-child(), :last-of-type...): we should clean them.
7609 // (We do that here for after the initial loading phase - on re-renderings,
7610 // this is done in updateRendMethod() called by initNodeRendMethodRecursive()
7611 // on all nodes.)
7612 _document->getRootNode()->clearRenderDataRecursive();
7613 }
7614 }
7615
7616 #endif
7617 }
7618
OnTagClose(const lChar32 *,const lChar32 * tagname,bool self_closing_tag)7619 void ldomDocumentWriter::OnTagClose( const lChar32 *, const lChar32 * tagname, bool self_closing_tag )
7620 {
7621 //logfile << "ldomDocumentWriter::OnTagClose() [" << nsname << ":" << tagname << "]";
7622 if (!_currNode || !_currNode->getElement())
7623 {
7624 _errFlag = true;
7625 //logfile << " !c-err!\n";
7626 return;
7627 }
7628
7629 //lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
7630 lUInt16 curNodeId = _currNode->getElement()->getNodeId();
7631 lUInt16 id = _document->getElementNameIndex(tagname);
7632 _errFlag |= (id != curNodeId); // (we seem to not do anything with _errFlag)
7633 // We should expect the tagname we got to be the same as curNode's element name,
7634 // but it looks like we may get an upper closing tag, that pop() below might
7635 // handle. So, here below, we check that both id and curNodeId match the
7636 // element id we check for.
7637
7638 // Parse <link rel="stylesheet">, put the css file link in _stylesheetLinks.
7639 // They will be added to <body><stylesheet> when we meet <BODY>
7640 // (duplicated in ldomDocumentWriterFilter::OnTagClose)
7641 if ( id == el_link && curNodeId == el_link ) { // link node
7642 ldomNode * n = _currNode->getElement();
7643 if ( n->getParentNode() && n->getParentNode()->getNodeId() == el_head &&
7644 lString32(n->getAttributeValue("rel")).lowercase() == U"stylesheet" &&
7645 lString32(n->getAttributeValue("type")).lowercase() == U"text/css" ) {
7646 lString32 href = n->getAttributeValue("href");
7647 lString32 stylesheetFile = LVCombinePaths( _document->getCodeBase(), href );
7648 CRLog::debug("Internal stylesheet file: %s", LCSTR(stylesheetFile));
7649 // We no more apply it immediately: it will be when <BODY> is met
7650 // _document->setDocStylesheetFileName(stylesheetFile);
7651 // _document->applyDocumentStyleSheet();
7652 _stylesheetLinks.add(stylesheetFile);
7653 }
7654 }
7655
7656 _currNode = pop( _currNode, id );
7657 // _currNode is now the parent
7658
7659 if ( _currNode )
7660 _flags = _currNode->getFlags();
7661
7662 if ( id==_stopTagId ) {
7663 //CRLog::trace("stop tag found, stopping...");
7664 _parser->Stop();
7665 }
7666
7667 // For EPUB/HTML, this is now dealt with in :OnTagBody(), just before creating this <stylesheet> tag.
7668 // But for FB2, where we have:
7669 // <FictionBook>
7670 // <stylesheet type="text/css">
7671 // some css
7672 // </stylesheet>
7673 // <p>...
7674 // other content
7675 // </FictionBook>
7676 // we need to apply the <stylesheet> content we have just left, so it applies
7677 // to the coming up content.
7678 // We check the parent we have just pop'ed is a <FictionBook>.
7679 // Caveat: any style set on the <FictionBook> element itself won't be applied now
7680 // in this loading phase (as we have already set its style) - but it will apply
7681 // on re-renderings.
7682 if ( id == el_stylesheet && _currNode && _currNode->getElement()->getNodeId() == el_FictionBook ) {
7683 //CRLog::trace("</stylesheet> found");
7684 #if BUILD_LITE!=1
7685 if ( !_popStyleOnFinish && _document->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) {
7686 //CRLog::trace("saving current stylesheet before applying of document stylesheet");
7687 _document->getStyleSheet()->push();
7688 _popStyleOnFinish = true;
7689 _document->applyDocumentStyleSheet();
7690 }
7691 #endif
7692 }
7693
7694 //logfile << " !c!\n";
7695 }
7696
OnAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)7697 void ldomDocumentWriter::OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue )
7698 {
7699 //logfile << "ldomDocumentWriter::OnAttribute() [" << nsname << ":" << attrname << "]";
7700 lUInt16 attr_ns = (nsname && nsname[0]) ? _document->getNsNameIndex( nsname ) : 0;
7701 lUInt16 attr_id = (attrname && attrname[0]) ? _document->getAttrNameIndex( attrname ) : 0;
7702 _currNode->addAttribute( attr_ns, attr_id, attrvalue );
7703
7704 //logfile << " !a!\n";
7705 }
7706
OnText(const lChar32 * text,int len,lUInt32 flags)7707 void ldomDocumentWriter::OnText( const lChar32 * text, int len, lUInt32 flags )
7708 {
7709 //logfile << "ldomDocumentWriter::OnText() fpos=" << fpos;
7710
7711 // Accumulate <HEAD><STYLE> content
7712 if (_inHeadStyle) {
7713 _headStyleText << lString32(text, len);
7714 _inHeadStyle = false;
7715 return;
7716 }
7717
7718 if (_currNode)
7719 {
7720 if ( (_flags & XML_FLAG_NO_SPACE_TEXT)
7721 && IsEmptySpace(text, len) && !(flags & TXTFLG_PRE))
7722 return;
7723 if (_currNode->_allowText)
7724 _currNode->onText( text, len, flags );
7725 }
7726 //logfile << " !t!\n";
7727 }
7728
OnEncoding(const lChar32 *,const lChar32 *)7729 void ldomDocumentWriter::OnEncoding( const lChar32 *, const lChar32 *)
7730 {
7731 }
7732
ldomDocumentWriter(ldomDocument * document,bool headerOnly)7733 ldomDocumentWriter::ldomDocumentWriter(ldomDocument * document, bool headerOnly)
7734 : _document(document), _currNode(NULL), _errFlag(false), _headerOnly(headerOnly), _popStyleOnFinish(false), _flags(0), _inHeadStyle(false)
7735 {
7736 _headStyleText.clear();
7737 _stylesheetLinks.clear();
7738 _stopTagId = 0xFFFE;
7739 IS_FIRST_BODY = true;
7740
7741 #if BUILD_LITE!=1
7742 if ( _document->isDefStyleSet() ) {
7743 _document->getRootNode()->initNodeStyle();
7744 _document->getRootNode()->setRendMethod(erm_block);
7745 }
7746 #endif
7747
7748 //CRLog::trace("ldomDocumentWriter() headerOnly=%s", _headerOnly?"true":"false");
7749 }
7750
7751
7752
7753
7754
7755
7756
7757
FindNextNode(ldomNode * & node,ldomNode * root)7758 bool FindNextNode( ldomNode * & node, ldomNode * root )
7759 {
7760 if ( node->getChildCount()>0 ) {
7761 // first child
7762 node = node->getChildNode(0);
7763 return true;
7764 }
7765 if (node->isRoot() || node == root )
7766 return false; // root node reached
7767 int index = node->getNodeIndex();
7768 ldomNode * parent = node->getParentNode();
7769 while (parent)
7770 {
7771 if ( index < (int)parent->getChildCount()-1 ) {
7772 // next sibling
7773 node = parent->getChildNode( index + 1 );
7774 return true;
7775 }
7776 if (parent->isRoot() || parent == root )
7777 return false; // root node reached
7778 // up one level
7779 index = parent->getNodeIndex();
7780 parent = parent->getParentNode();
7781 }
7782 //if ( node->getNodeType() == LXML_TEXT_NODE )
7783 return false;
7784 }
7785
7786 // base64 decode table
7787 static const signed char base64_decode_table[] = {
7788 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //0..15
7789 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, //16..31 10
7790 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, //32..47 20
7791 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, //48..63 30
7792 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, //64..79 40
7793 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, //80..95 50
7794 -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, //INDEX2..111 60
7795 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1 //112..127 70
7796 };
7797
7798 #define BASE64_BUF_SIZE 128
7799 class LVBase64NodeStream : public LVNamedStream
7800 {
7801 private:
7802 ldomNode * m_elem;
7803 ldomNode * m_curr_node;
7804 lString32 m_curr_text;
7805 int m_text_pos;
7806 lvsize_t m_size;
7807 lvpos_t m_pos;
7808
7809 int m_iteration;
7810 lUInt32 m_value;
7811
7812 lUInt8 m_bytes[BASE64_BUF_SIZE];
7813 int m_bytes_count;
7814 int m_bytes_pos;
7815
readNextBytes()7816 int readNextBytes()
7817 {
7818 int bytesRead = 0;
7819 bool flgEof = false;
7820 while ( bytesRead == 0 && !flgEof )
7821 {
7822 while ( m_text_pos >= (int)m_curr_text.length() )
7823 {
7824 if ( !findNextTextNode() )
7825 return bytesRead;
7826 }
7827 int len = m_curr_text.length();
7828 const lChar32 * txt = m_curr_text.c_str();
7829 for ( ; m_text_pos<len && m_bytes_count < BASE64_BUF_SIZE - 3; m_text_pos++ )
7830 {
7831 lChar32 ch = txt[ m_text_pos ];
7832 if ( ch < 128 )
7833 {
7834 if ( ch == '=' )
7835 {
7836 // end of stream
7837 if ( m_iteration == 2 )
7838 {
7839 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>4) & 0xFF);
7840 bytesRead++;
7841 }
7842 else if ( m_iteration == 3 )
7843 {
7844 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>10) & 0xFF);
7845 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>2) & 0xFF);
7846 bytesRead += 2;
7847 }
7848 // stop!!!
7849 //m_text_pos--;
7850 m_iteration = 0;
7851 flgEof = true;
7852 break;
7853 }
7854 else
7855 {
7856 int k = base64_decode_table[ch];
7857 if ( !(k & 0x80) ) {
7858 // next base-64 digit
7859 m_value = (m_value << 6) | (k);
7860 m_iteration++;
7861 if (m_iteration==4)
7862 {
7863 //
7864 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>16) & 0xFF);
7865 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>8) & 0xFF);
7866 m_bytes[m_bytes_count++] = (lUInt8)((m_value>>0) & 0xFF);
7867 m_iteration = 0;
7868 m_value = 0;
7869 bytesRead+=3;
7870 }
7871 } else {
7872 //m_text_pos++;
7873 }
7874 }
7875 }
7876 }
7877 }
7878 return bytesRead;
7879 }
7880
findNextTextNode()7881 bool findNextTextNode()
7882 {
7883 while ( FindNextNode( m_curr_node, m_elem ) ) {
7884 if ( m_curr_node->isText() ) {
7885 m_curr_text = m_curr_node->getText();
7886 m_text_pos = 0;
7887 return true;
7888 }
7889 }
7890 return false;
7891 }
7892
bytesAvailable()7893 int bytesAvailable() { return m_bytes_count - m_bytes_pos; }
7894
rewind()7895 bool rewind()
7896 {
7897 m_curr_node = m_elem;
7898 m_pos = 0;
7899 m_bytes_count = 0;
7900 m_bytes_pos = 0;
7901 m_iteration = 0;
7902 m_value = 0;
7903 return findNextTextNode();
7904 }
7905
skip(lvsize_t count)7906 bool skip( lvsize_t count )
7907 {
7908 while ( count )
7909 {
7910 if ( m_bytes_pos >= m_bytes_count )
7911 {
7912 m_bytes_pos = 0;
7913 m_bytes_count = 0;
7914 int bytesRead = readNextBytes();
7915 if ( bytesRead == 0 )
7916 return false;
7917 }
7918 int diff = (int) (m_bytes_count - m_bytes_pos);
7919 if (diff > (int)count)
7920 diff = (int)count;
7921 m_pos += diff;
7922 count -= diff;
7923 }
7924 return true;
7925 }
7926
7927 public:
~LVBase64NodeStream()7928 virtual ~LVBase64NodeStream() { }
LVBase64NodeStream(ldomNode * element)7929 LVBase64NodeStream( ldomNode * element )
7930 : m_elem(element), m_curr_node(element), m_text_pos(0), m_size(0), m_pos(0)
7931 {
7932 // calculate size
7933 rewind();
7934 m_size = bytesAvailable();
7935 for (;;) {
7936 int bytesRead = readNextBytes();
7937 if ( !bytesRead )
7938 break;
7939 m_bytes_count = 0;
7940 m_bytes_pos = 0;
7941 m_size += bytesRead;
7942 }
7943 // rewind
7944 rewind();
7945 }
Eof()7946 virtual bool Eof()
7947 {
7948 return m_pos >= m_size;
7949 }
GetSize()7950 virtual lvsize_t GetSize()
7951 {
7952 return m_size;
7953 }
7954
GetPos()7955 virtual lvpos_t GetPos()
7956 {
7957 return m_pos;
7958 }
7959
GetPos(lvpos_t * pos)7960 virtual lverror_t GetPos( lvpos_t * pos )
7961 {
7962 if (pos)
7963 *pos = m_pos;
7964 return LVERR_OK;
7965 }
7966
Seek(lvoffset_t offset,lvseek_origin_t origin,lvpos_t * newPos)7967 virtual lverror_t Seek(lvoffset_t offset, lvseek_origin_t origin, lvpos_t* newPos)
7968 {
7969 lvpos_t npos = 0;
7970 lvpos_t currpos = GetPos();
7971 switch (origin) {
7972 case LVSEEK_SET:
7973 npos = offset;
7974 break;
7975 case LVSEEK_CUR:
7976 npos = currpos + offset;
7977 break;
7978 case LVSEEK_END:
7979 npos = m_size + offset;
7980 break;
7981 }
7982 if (npos > m_size)
7983 return LVERR_FAIL;
7984 if ( npos != currpos )
7985 {
7986 if (npos < currpos)
7987 {
7988 if ( !rewind() || !skip(npos) )
7989 return LVERR_FAIL;
7990 }
7991 else
7992 {
7993 skip( npos - currpos );
7994 }
7995 }
7996 if (newPos)
7997 *newPos = npos;
7998 return LVERR_OK;
7999 }
Write(const void *,lvsize_t,lvsize_t *)8000 virtual lverror_t Write(const void*, lvsize_t, lvsize_t*)
8001 {
8002 return LVERR_NOTIMPL;
8003 }
Read(void * buf,lvsize_t size,lvsize_t * pBytesRead)8004 virtual lverror_t Read(void* buf, lvsize_t size, lvsize_t* pBytesRead)
8005 {
8006 lvsize_t bytesRead = 0;
8007 //fprintf( stderr, "Read()\n" );
8008
8009 lUInt8 * out = (lUInt8 *)buf;
8010
8011 while (size>0)
8012 {
8013 int sz = bytesAvailable();
8014 if (!sz) {
8015 m_bytes_pos = m_bytes_count = 0;
8016 sz = readNextBytes();
8017 if (!sz) {
8018 if ( !bytesRead || m_pos!=m_size) //
8019 return LVERR_FAIL;
8020 break;
8021 }
8022 }
8023 if (sz>(int)size)
8024 sz = (int)size;
8025 for (int i=0; i<sz; i++)
8026 *out++ = m_bytes[m_bytes_pos++];
8027 size -= sz;
8028 bytesRead += sz;
8029 m_pos += sz;
8030 }
8031
8032 if (pBytesRead)
8033 *pBytesRead = bytesRead;
8034 //fprintf( stderr, " %d bytes read...\n", (int)bytesRead );
8035 return LVERR_OK;
8036 }
SetSize(lvsize_t)8037 virtual lverror_t SetSize(lvsize_t)
8038 {
8039 return LVERR_NOTIMPL;
8040 }
8041 };
8042
img_scaling_option_t()8043 img_scaling_option_t::img_scaling_option_t()
8044 {
8045 mode = (MAX_IMAGE_SCALE_MUL>1) ? (ARBITRARY_IMAGE_SCALE_ENABLED==1 ? IMG_FREE_SCALING : IMG_INTEGER_SCALING) : IMG_NO_SCALE;
8046 max_scale = (MAX_IMAGE_SCALE_MUL>1) ? MAX_IMAGE_SCALE_MUL : 1;
8047 }
8048
img_scaling_options_t()8049 img_scaling_options_t::img_scaling_options_t()
8050 {
8051 img_scaling_option_t option;
8052 zoom_in_inline = option;
8053 zoom_in_block = option;
8054 zoom_out_inline = option;
8055 zoom_out_block = option;
8056 }
8057
8058 #define FONT_SIZE_BIG 32
8059 #define FONT_SIZE_VERY_BIG 50
updateScalingOption(img_scaling_option_t & v,CRPropRef props,int fontSize,bool zoomin,bool isInline)8060 static bool updateScalingOption( img_scaling_option_t & v, CRPropRef props, int fontSize, bool zoomin, bool isInline )
8061 {
8062 lString8 propName("crengine.image.scaling.");
8063 propName << (zoomin ? "zoomin." : "zoomout.");
8064 propName << (isInline ? "inline." : "block.");
8065 lString8 propNameMode = propName + "mode";
8066 lString8 propNameScale = propName + "scale";
8067 img_scaling_option_t def;
8068 int currMode = props->getIntDef(propNameMode.c_str(), (int)def.mode);
8069 int currScale = props->getIntDef(propNameScale.c_str(), (int)def.max_scale);
8070 if ( currScale==0 ) {
8071 if ( fontSize>=FONT_SIZE_VERY_BIG )
8072 currScale = 3;
8073 else if ( fontSize>=FONT_SIZE_BIG )
8074 currScale = 2;
8075 else
8076 currScale = 1;
8077 }
8078 if ( currScale==1 )
8079 currMode = 0;
8080 bool updated = false;
8081 if ( v.max_scale!=currScale ) {
8082 updated = true;
8083 v.max_scale = currScale;
8084 }
8085 if ( v.mode!=(img_scaling_mode_t)currMode ) {
8086 updated = true;
8087 v.mode = (img_scaling_mode_t)currMode;
8088 }
8089 props->setIntDef(propNameMode.c_str(), currMode);
8090 props->setIntDef(propNameScale.c_str(), currScale);
8091 return updated;
8092 }
8093
8094 /// returns true if any changes occured
update(CRPropRef props,int fontSize)8095 bool img_scaling_options_t::update( CRPropRef props, int fontSize )
8096 {
8097 bool updated = false;
8098 updated = updateScalingOption( zoom_in_inline, props, fontSize, true, true ) || updated;
8099 updated = updateScalingOption( zoom_in_block, props, fontSize, true, false ) || updated;
8100 updated = updateScalingOption( zoom_out_inline, props, fontSize, false, true ) || updated;
8101 updated = updateScalingOption( zoom_out_block, props, fontSize, false, false ) || updated;
8102 return updated;
8103 }
8104
ParseXPathStep(const lChar32 * & path,lString32 & name,int & index)8105 xpath_step_t ParseXPathStep( const lChar32 * &path, lString32 & name, int & index )
8106 {
8107 int pos = 0;
8108 const lChar32 * s = path;
8109 //int len = path.GetLength();
8110 name.clear();
8111 index = -1;
8112 int flgPrefix = 0;
8113 if (s && s[pos]) {
8114 lChar32 ch = s[pos];
8115 // prefix: none, '/' or '.'
8116 if (ch=='/') {
8117 flgPrefix = 1;
8118 ch = s[++pos];
8119 } else if (ch=='.') {
8120 flgPrefix = 2;
8121 ch = s[++pos];
8122 }
8123 int nstart = pos;
8124 if (ch>='0' && ch<='9') {
8125 // node or point index
8126 pos++;
8127 while (s[pos]>='0' && s[pos]<='9')
8128 pos++;
8129 if (s[pos] && s[pos]!='/' && s[pos]!='.')
8130 return xpath_step_error;
8131 lString32 sindex( path+nstart, pos-nstart );
8132 index = sindex.atoi();
8133 if (index<((flgPrefix==2)?0:1))
8134 return xpath_step_error;
8135 path += pos;
8136 return (flgPrefix==2) ? xpath_step_point : xpath_step_nodeindex;
8137 }
8138 while (s[pos] && s[pos]!='[' && s[pos]!='/' && s[pos]!='.')
8139 pos++;
8140 if (pos==nstart)
8141 return xpath_step_error;
8142 name = lString32( path+ nstart, pos-nstart );
8143 if (s[pos]=='[') {
8144 // index
8145 pos++;
8146 int istart = pos;
8147 while (s[pos] && s[pos]!=']' && s[pos]!='/' && s[pos]!='.')
8148 pos++;
8149 if (!s[pos] || pos==istart)
8150 return xpath_step_error;
8151
8152 lString32 sindex( path+istart, pos-istart );
8153 index = sindex.atoi();
8154 pos++;
8155 }
8156 if (!s[pos] || s[pos]=='/' || s[pos]=='.') {
8157 path += pos;
8158 return (name == "text()") ? xpath_step_text : xpath_step_element; // OK!
8159 }
8160 return xpath_step_error; // error
8161 }
8162 return xpath_step_error;
8163 }
8164
8165
8166 /// get pointer for relative path
relative(lString32 relativePath)8167 ldomXPointer ldomXPointer::relative( lString32 relativePath )
8168 {
8169 return getDocument()->createXPointer( getNode(), relativePath );
8170 }
8171 /// create xpointer from pointer string
createXPointer(const lString32 & xPointerStr)8172 ldomXPointer ldomDocument::createXPointer( const lString32 & xPointerStr )
8173 {
8174 if ( xPointerStr[0]=='#' ) {
8175 lString32 id = xPointerStr.substr(1);
8176 lUInt32 idid = getAttrValueIndex(id.c_str());
8177 lInt32 nodeIndex;
8178 if ( _idNodeMap.get(idid, nodeIndex) ) {
8179 ldomNode * node = getTinyNode(nodeIndex);
8180 if ( node && node->isElement() ) {
8181 return ldomXPointer(node, -1);
8182 }
8183 }
8184 return ldomXPointer();
8185 }
8186 return createXPointer( getRootNode(), xPointerStr );
8187 }
8188
8189 #if BUILD_LITE!=1
8190
8191 /// return parent final node, if found
getFinalNode() const8192 ldomNode * ldomXPointer::getFinalNode() const
8193 {
8194 ldomNode * node = getNode();
8195 for (;;) {
8196 if ( !node )
8197 return NULL;
8198 if ( node->getRendMethod()==erm_final )
8199 return node;
8200 node = node->getParentNode();
8201 }
8202 }
8203
8204 /// return true is this node is a final node
isFinalNode() const8205 bool ldomXPointer::isFinalNode() const
8206 {
8207 ldomNode * node = getNode();
8208 if ( !node )
8209 return false;
8210 if ( node->getRendMethod()==erm_final )
8211 return true;
8212 return false;
8213 }
8214
8215 /// create xpointer from doc point
createXPointer(lvPoint pt,int direction,bool strictBounds,ldomNode * fromNode)8216 ldomXPointer ldomDocument::createXPointer( lvPoint pt, int direction, bool strictBounds, ldomNode * fromNode )
8217 {
8218 //
8219 lvPoint orig_pt = lvPoint(pt);
8220 ldomXPointer ptr;
8221 if ( !getRootNode() )
8222 return ptr;
8223 ldomNode * startNode;
8224 if ( fromNode ) {
8225 // Start looking from the fromNode provided - only used when we are
8226 // looking inside a floatBox or an inlineBox below and we have this
8227 // recursive call to createXPointer().
8228 // Even with a provided fromNode, pt must be provided in full absolute
8229 // coordinates. But we need to give to startNode->elementFromPoint()
8230 // a pt with coordinates relative to fromNode.
8231 // And because elementFromPoint() uses the fmt x/y offsets of the
8232 // start node (relative to the containing final block), we would
8233 // need to have pt relative to that containing final block - and so,
8234 // we'd need to lookup the final node from here (or have it provided
8235 // as an additional parameter if it's known by caller).
8236 // But because we're called only for floatBox and inlineBox, which
8237 // have only a single child, we can use the trick of calling
8238 // ->elementFromPoint() on that first child, while still getting
8239 // pt relative to fromNode itself:
8240 startNode = fromNode->getChildNode(0);
8241 lvRect rc;
8242 fromNode->getAbsRect( rc, true );
8243 pt.x -= rc.left;
8244 pt.y -= rc.top;
8245 }
8246 else {
8247 startNode = getRootNode();
8248 }
8249 ldomNode * finalNode = startNode->elementFromPoint( pt, direction );
8250 if ( fromNode )
8251 pt = orig_pt; // restore orig pt
8252 if ( !finalNode ) {
8253 // printf("no finalNode found from %s\n", UnicodeToLocal(ldomXPointer(fromNode, 0).toString()).c_str());
8254 // No node found, return start or end of document if pt overflows it, otherwise NULL
8255 if ( pt.y >= getFullHeight()) {
8256 ldomNode * node = getRootNode()->getLastTextChild();
8257 return ldomXPointer(node,node ? node->getText().length() : 0);
8258 } else if ( pt.y <= 0 ) {
8259 ldomNode * node = getRootNode()->getFirstTextChild();
8260 return ldomXPointer(node, 0);
8261 }
8262 CRLog::trace("not final node");
8263 return ptr;
8264 }
8265 // printf("finalNode %s\n", UnicodeToLocal(ldomXPointer(finalNode, 0).toString()).c_str());
8266
8267 lvdom_element_render_method rm = finalNode->getRendMethod();
8268 if ( rm != erm_final ) {
8269 // Not final, return XPointer to first or last child
8270 lvRect rc;
8271 finalNode->getAbsRect( rc );
8272 if ( pt.y < (rc.bottom + rc.top) / 2 )
8273 return ldomXPointer( finalNode, 0 );
8274 else
8275 return ldomXPointer( finalNode, finalNode->getChildCount() );
8276 }
8277
8278 // Final node found
8279 // Adjust pt in coordinates of the FormattedText
8280 RenderRectAccessor fmt( finalNode );
8281 lvRect rc;
8282 // When in enhanced rendering mode, we can get the FormattedText coordinates
8283 // and its width (inner_width) directly
8284 finalNode->getAbsRect( rc, true ); // inner=true
8285 pt.x -= rc.left;
8286 pt.y -= rc.top;
8287 int inner_width;
8288 if ( RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
8289 inner_width = fmt.getInnerWidth();
8290 }
8291 else {
8292 // In legacy mode, we just got the erm_final coordinates, and we must
8293 // compute and remove left/top border and padding (using rc.width() as
8294 // the base for % is wrong here, and so is rc.height() for padding top)
8295 int em = finalNode->getFont()->getSize();
8296 int padding_left = measureBorder(finalNode,3)+lengthToPx(finalNode->getStyle()->padding[0],rc.width(),em);
8297 int padding_right = measureBorder(finalNode,1)+lengthToPx(finalNode->getStyle()->padding[1],rc.width(),em);
8298 int padding_top = measureBorder(finalNode,0)+lengthToPx(finalNode->getStyle()->padding[2],rc.height(),em);
8299 pt.x -= padding_left;
8300 pt.y -= padding_top;
8301 // As well as the inner width
8302 inner_width = fmt.getWidth() - padding_left - padding_right;
8303 }
8304
8305 // Get the formatted text, so we can look for 'pt' line by line, word by word,
8306 // (and embedded float by embedded float if there are some).
8307 LFormattedTextRef txtform;
8308 {
8309 // This will possibly return it from CVRendBlockCache
8310 finalNode->renderFinalBlock( txtform, &fmt, inner_width );
8311 }
8312
8313 // First, look if pt happens to be in some float
8314 // (this may not work with floats with negative margins)
8315 int fcount = txtform->GetFloatCount();
8316 for (int f=0; f<fcount; f++) {
8317 const embedded_float_t * flt = txtform->GetFloatInfo(f);
8318 // Ignore fake floats (no srctext) made from outer floats footprint
8319 if ( flt->srctext == NULL )
8320 continue;
8321 if (pt.x >= flt->x && pt.x < flt->x + flt->width && pt.y >= flt->y && pt.y < flt->y + flt->height ) {
8322 // pt is inside this float.
8323 ldomNode * node = (ldomNode *) flt->srctext->object; // floatBox node
8324 ldomXPointer inside_ptr = createXPointer( orig_pt, direction, strictBounds, node );
8325 if ( !inside_ptr.isNull() ) {
8326 return inside_ptr;
8327 }
8328 // Otherwise, return xpointer to the floatNode itself
8329 return ldomXPointer(node, 0);
8330 // (Or should we let just go on looking only at the text in the original final node?)
8331 }
8332 // If no containing float, go on looking at the text of the original final node
8333 }
8334
8335 // Look at words in the rendered final node (whether it's the original
8336 // main final node, or the one found in a float)
8337 int lcount = txtform->GetLineCount();
8338 for ( int l = 0; l<lcount; l++ ) {
8339 const formatted_line_t * frmline = txtform->GetLineInfo(l);
8340 if ( pt.y >= (int)(frmline->y + frmline->height) && l<lcount-1 )
8341 continue;
8342 // CRLog::debug(" point (%d, %d) line found [%d]: (%d..%d)",
8343 // pt.x, pt.y, l, frmline->y, frmline->y+frmline->height);
8344 bool line_is_bidi = frmline->flags & LTEXT_LINE_IS_BIDI;
8345 int wc = (int)frmline->word_count;
8346
8347 if ( direction >= PT_DIR_SCAN_FORWARD_LOGICAL_FIRST || direction <= PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST ) {
8348 // Only used by LVDocView::getBookmark(), LVDocView::getPageDocumentRange()
8349 // and ldomDocument::findText(), to not miss any content or text from
8350 // the page.
8351 // The SCAN_ part has been done done: a line has been found, and we want
8352 // to find node/chars from it in the logical (HTML) order, and not in the
8353 // visual order (that PT_DIR_SCAN_FORWARD/PT_DIR_SCAN_BACKWARD do), which
8354 // might not be the same in bidi lines:
8355 bool find_first = direction == PT_DIR_SCAN_FORWARD_LOGICAL_FIRST ||
8356 direction == PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST;
8357 // so, false when PT_DIR_SCAN_FORWARD_LOGICAL_LAST
8358 // or PT_DIR_SCAN_BACKWARD_LOGICAL_LAST
8359
8360 const formatted_word_t * word = NULL;
8361 for ( int w=0; w<wc; w++ ) {
8362 const formatted_word_t * tmpword = &frmline->words[w];
8363 const src_text_fragment_t * src = txtform->GetSrcInfo(tmpword->src_text_index);
8364 ldomNode * node = (ldomNode *)src->object;
8365 if ( !node ) // ignore crengine added text (spacing, list item bullets...)
8366 continue;
8367 if ( !line_is_bidi ) {
8368 word = tmpword;
8369 if ( find_first )
8370 break; // found logical first real word
8371 // otherwise, go to the end, word will be logical last real word
8372 }
8373 else {
8374 if (!word) { // first word seen: first candidate
8375 word = tmpword;
8376 }
8377 else { // compare current word to the current candidate
8378 if ( find_first && tmpword->src_text_index < word->src_text_index ) {
8379 word = tmpword;
8380 }
8381 else if ( !find_first && tmpword->src_text_index > word->src_text_index ) {
8382 word = tmpword;
8383 }
8384 else if (tmpword->src_text_index == word->src_text_index ) {
8385 // (Same src_text_fragment_t, same src->t.offset, skip in when comparing)
8386 if ( find_first && tmpword->t.start < word->t.start ) {
8387 word = tmpword;
8388 }
8389 else if ( !find_first && tmpword->t.start > word->t.start ) {
8390 word = tmpword;
8391 }
8392 }
8393 }
8394 }
8395 }
8396 if ( !word ) // no word: no xpointer (should not happen?)
8397 return ptr;
8398 // Found right word/image
8399 const src_text_fragment_t * src = txtform->GetSrcInfo(word->src_text_index);
8400 ldomNode * node = (ldomNode *)src->object;
8401 if ( word->flags & LTEXT_WORD_IS_INLINE_BOX ) {
8402 // pt is inside this inline-block inlineBox node
8403 ldomXPointer inside_ptr = createXPointer( orig_pt, direction, strictBounds, node );
8404 if ( !inside_ptr.isNull() ) {
8405 return inside_ptr;
8406 }
8407 // Otherwise, return xpointer to the inlineBox itself
8408 return ldomXPointer(node, 0);
8409 }
8410 if ( word->flags & LTEXT_WORD_IS_OBJECT ) {
8411 return ldomXPointer(node, 0);
8412 }
8413 // It is a word
8414 if ( find_first ) // return xpointer to logical start of word
8415 return ldomXPointer( node, src->t.offset + word->t.start );
8416 else // return xpointer to logical end of word
8417 return ldomXPointer( node, src->t.offset + word->t.start + word->t.len );
8418 }
8419
8420 // Found line, searching for word (words are in visual order)
8421 int x = pt.x - frmline->x;
8422 // frmline->x is text indentation (+ possibly leading space if text
8423 // centered or right aligned)
8424 if (strictBounds) {
8425 if (x < 0 || x > frmline->width) { // pt is before or after formatted text: nothing there
8426 return ptr;
8427 }
8428 }
8429
8430 for ( int w=0; w<wc; w++ ) {
8431 const formatted_word_t * word = &frmline->words[w];
8432 if ( ( !line_is_bidi && x < word->x + word->width ) ||
8433 ( line_is_bidi && x >= word->x && x < word->x + word->width ) ||
8434 ( w == wc-1 ) ) {
8435 const src_text_fragment_t * src = txtform->GetSrcInfo(word->src_text_index);
8436 // CRLog::debug(" word found [%d]: x=%d..%d, start=%d, len=%d %08X",
8437 // w, word->x, word->x + word->width, word->t.start, word->t.len, src->object);
8438
8439 ldomNode * node = (ldomNode *)src->object;
8440 if ( !node ) // Ignore crengine added text (spacing, list item bullets...)
8441 continue;
8442
8443 if ( word->flags & LTEXT_WORD_IS_INLINE_BOX ) {
8444 // pt is inside this inline-block inlineBox node
8445 ldomXPointer inside_ptr = createXPointer( orig_pt, direction, strictBounds, node );
8446 if ( !inside_ptr.isNull() ) {
8447 return inside_ptr;
8448 }
8449 // Otherwise, return xpointer to the inlineBox itself
8450 return ldomXPointer(node, 0);
8451 }
8452 if ( word->flags & LTEXT_WORD_IS_OBJECT ) {
8453 // Object (image)
8454 #if 1
8455 // return image object itself
8456 return ldomXPointer(node, 0);
8457 #else
8458 return ldomXPointer( node->getParentNode(),
8459 node->getNodeIndex() + (( x < word->x + word->width/2 ) ? 0 : 1) );
8460 #endif
8461 }
8462
8463 // Found word, searching for letters
8464 LVFont * font = (LVFont *) src->t.font;
8465 lUInt16 width[512];
8466 lUInt8 flg[512];
8467
8468 lString32 str = node->getText();
8469 // We need to transform the node text as it had been when
8470 // rendered (the transform may change chars widths) for the
8471 // XPointer offset to be correct
8472 switch ( node->getParentNode()->getStyle()->text_transform ) {
8473 case css_tt_uppercase:
8474 str.uppercase();
8475 break;
8476 case css_tt_lowercase:
8477 str.lowercase();
8478 break;
8479 case css_tt_capitalize:
8480 str.capitalize();
8481 break;
8482 case css_tt_full_width:
8483 // str.fullWidthChars(); // disabled for now in lvrend.cpp
8484 break;
8485 default:
8486 break;
8487 }
8488
8489 lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags);
8490 font->measureText( str.c_str()+word->t.start, word->t.len, width, flg, word->width+50, '?',
8491 src->lang_cfg, src->letter_spacing + word->added_letter_spacing, false, hints);
8492
8493 bool word_is_rtl = word->flags & LTEXT_WORD_DIRECTION_IS_RTL;
8494 if ( word_is_rtl ) {
8495 for ( int i=word->t.len-1; i>=0; i-- ) {
8496 int xx = ( i>0 ) ? (width[i-1] + width[i])/2 : width[i]/2;
8497 xx = word->width - xx;
8498 if ( x < word->x + xx ) {
8499 return ldomXPointer( node, src->t.offset + word->t.start + i );
8500 }
8501 }
8502 return ldomXPointer( node, src->t.offset + word->t.start );
8503 }
8504 else {
8505 for ( int i=0; i<word->t.len; i++ ) {
8506 int xx = ( i>0 ) ? (width[i-1] + width[i])/2 : width[i]/2;
8507 if ( x < word->x + xx ) {
8508 return ldomXPointer( node, src->t.offset + word->t.start + i );
8509 }
8510 }
8511 return ldomXPointer( node, src->t.offset + word->t.start + word->t.len );
8512 }
8513 }
8514 }
8515 }
8516 return ptr;
8517 }
8518
8519 /// returns coordinates of pointer inside formatted document
toPoint(bool extended) const8520 lvPoint ldomXPointer::toPoint(bool extended) const
8521 {
8522 lvRect rc;
8523 if ( !getRect( rc, extended ) )
8524 return lvPoint(-1, -1);
8525 return rc.topLeft();
8526 }
8527
8528 /// returns caret rectangle for pointer inside formatted document
8529 // (with extended=true, consider paddings and borders)
8530 // Note that extended / ldomXPointer::getRectEx() is only used (by cre.cpp)
8531 // when dealing with hyphenated words, getting each char width, char by char.
8532 // So we return the char width (and no more the word width) of the char
8533 // pointed to by this XPointer (unlike ldomXRange::getRectEx() which deals
8534 // with a range between 2 XPointers).
getRect(lvRect & rect,bool extended,bool adjusted) const8535 bool ldomXPointer::getRect(lvRect & rect, bool extended, bool adjusted) const
8536 {
8537 //CRLog::trace("ldomXPointer::getRect()");
8538 if ( isNull() )
8539 return false;
8540 ldomNode * p = isElement() ? getNode() : getNode()->getParentNode();
8541 ldomNode * p0 = p;
8542 ldomNode * finalNode = NULL;
8543 if ( !p ) {
8544 //CRLog::trace("ldomXPointer::getRect() - p==NULL");
8545 return false;
8546 }
8547 ldomDocument* doc = p->getDocument();
8548 //printf("getRect( p=%08X type=%d )\n", (unsigned)p, (int)p->getNodeType() );
8549 if ( !doc ) {
8550 //CRLog::trace("ldomXPointer::getRect() - p->getDocument()==NULL");
8551 return false;
8552 }
8553 ldomNode * mainNode = doc->getRootNode();
8554 for ( ; p; p = p->getParentNode() ) {
8555 int rm = p->getRendMethod();
8556 if ( rm == erm_final ) {
8557 if ( doc->getDOMVersionRequested() < 20180524 && p->getStyle()->display == css_d_list_item_legacy ) {
8558 // This legacy rendering of list item is now erm_final, but
8559 // can contain other real erm_final nodes.
8560 // So, if we found an erm_final, and if we find this erm_final
8561 // when going up, we should use it (unlike in next case).
8562 // (This is needed to correctly display highlights on books opened
8563 // with some older DOM_VERSION.)
8564 finalNode = p;
8565 }
8566 else {
8567 // With floats, we may get multiple erm_final when walking up
8568 // to root node: keep the first one met (but go on up to the
8569 // root node in case we're in some upper erm_invisible).
8570 if (!finalNode)
8571 finalNode = p; // found final block
8572 }
8573 }
8574 else if ( p->getRendMethod() == erm_invisible ) {
8575 return false; // invisible !!!
8576 }
8577 if ( p==mainNode )
8578 break;
8579 }
8580
8581 if ( finalNode==NULL ) {
8582 lvRect rc;
8583 p0->getAbsRect( rc );
8584 CRLog::debug("node w/o final parent: %d..%d", rc.top, rc.bottom);
8585 }
8586
8587 if ( finalNode!=NULL ) {
8588 lvRect rc;
8589 finalNode->getAbsRect( rc, extended ); // inner=true if extended=true
8590 if (rc.height() == 0 && rc.width() > 0) {
8591 rect = rc;
8592 rect.bottom++;
8593 return true;
8594 }
8595 RenderRectAccessor fmt( finalNode );
8596 //if ( !fmt )
8597 // return false;
8598
8599 // When in enhanced rendering mode, we can get the FormattedText coordinates
8600 // and its width (inner_width) directly
8601 int inner_width;
8602 if ( RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
8603 inner_width = fmt.getInnerWidth();
8604 // if extended=true, we got directly the adjusted rc.top and rc.left
8605 }
8606 else {
8607 // In legacy mode, we just got the erm_final coordinates, and we must
8608 // compute and remove left/top border and padding (using rc.width() as
8609 // the base for % is wrong here)
8610 int em = finalNode->getFont()->getSize();
8611 int padding_left = measureBorder(finalNode,3) + lengthToPx(finalNode->getStyle()->padding[0], rc.width(), em);
8612 int padding_right = measureBorder(finalNode,1) + lengthToPx(finalNode->getStyle()->padding[1], rc.width(), em);
8613 inner_width = fmt.getWidth() - padding_left - padding_right;
8614 if (extended) {
8615 int padding_top = measureBorder(finalNode,0) + lengthToPx(finalNode->getStyle()->padding[2], rc.width(), em);
8616 rc.top += padding_top;
8617 rc.left += padding_left;
8618 // rc.right += padding_left; // wrong, but not used
8619 // rc.bottom += padding_top; // wrong, but not used
8620 }
8621 }
8622
8623 // Get the formatted text, so we can look where in it is this XPointer
8624 LFormattedTextRef txtform;
8625 finalNode->renderFinalBlock( txtform, &fmt, inner_width );
8626
8627 ldomNode *node = getNode();
8628 int offset = getOffset();
8629 //// ldomXPointerEx xp(node, offset);
8630 //// if ( !node->isText() ) {
8631 //// //ldomXPointerEx xp(node, offset);
8632 //// xp.nextVisibleText();
8633 //// node = xp.getNode();
8634 //// offset = xp.getOffset();
8635 //// }
8636 // if ( node->isElement() ) {
8637 // if ( offset>=0 ) {
8638 // //
8639 // if ( offset>= (int)node->getChildCount() ) {
8640 // node = node->getLastTextChild();
8641 // if ( node )
8642 // offset = node->getText().length();
8643 // else
8644 // return false;
8645 // } else {
8646 // for ( int ci=offset; ci<(int)node->getChildCount(); ci++ ) {
8647 // ldomNode * child = node->getChildNode( offset );
8648 // ldomNode * txt = txt = child->getFirstTextChild( true );
8649 // if ( txt ) {
8650 // node = txt;
8651 //// lString32 s = txt->getText();
8652 //// CRLog::debug("text: [%d] '%s'", s.length(), LCSTR(s));
8653 // break;
8654 // }
8655 // }
8656 // if ( !node->isText() )
8657 // return false;
8658 // offset = 0;
8659 // }
8660 // }
8661 // }
8662
8663 // text node
8664 int srcIndex = -1;
8665 int srcLen = -1;
8666 int lastIndex = -1;
8667 int lastLen = -1;
8668 int lastOffset = -1;
8669 ldomXPointerEx xp(node, offset);
8670 for ( int i=0; i<txtform->GetSrcCount(); i++ ) {
8671 const src_text_fragment_t * src = txtform->GetSrcInfo(i);
8672 if ( src->flags & LTEXT_SRC_IS_FLOAT ) // skip floats
8673 continue;
8674 bool isObject = (src->flags<EXT_SRC_IS_OBJECT)!=0;
8675 if ( src->object == node ) {
8676 srcIndex = i;
8677 srcLen = isObject ? 0 : src->t.len;
8678 break;
8679 }
8680 lastIndex = i;
8681 lastLen = isObject ? 0 : src->t.len;
8682 lastOffset = isObject ? 0 : src->t.offset;
8683 ldomXPointerEx xp2((ldomNode*)src->object, lastOffset);
8684 if ( xp2.compare(xp)>0 ) {
8685 srcIndex = i;
8686 srcLen = lastLen;
8687 offset = lastOffset;
8688 break;
8689 }
8690 }
8691 if ( srcIndex == -1 ) {
8692 if ( lastIndex<0 )
8693 return false;
8694 srcIndex = lastIndex;
8695 srcLen = lastLen;
8696 offset = lastOffset;
8697 }
8698
8699 // Some state for non-linear bidi word search
8700 int nearestForwardSrcIndex = -1;
8701 int nearestForwardSrcOffset = -1;
8702 lvRect bestBidiRect = lvRect();
8703 bool hasBestBidiRect = false;
8704
8705 for ( int l = 0; l<txtform->GetLineCount(); l++ ) {
8706 const formatted_line_t * frmline = txtform->GetLineInfo(l);
8707 bool line_is_bidi = frmline->flags & LTEXT_LINE_IS_BIDI;
8708 for ( int w=0; w<(int)frmline->word_count; w++ ) {
8709 const formatted_word_t * word = &frmline->words[w];
8710 bool word_is_rtl = word->flags & LTEXT_WORD_DIRECTION_IS_RTL;
8711 bool lastWord = (l == txtform->GetLineCount() - 1
8712 && w == frmline->word_count - 1);
8713
8714 if ( line_is_bidi ) {
8715 // When line is bidi, src text nodes may be shuffled, so we can't
8716 // just be done when meeting a forward src in logical order.
8717 // We'd better have a dedicated searching code to not mess with
8718 // the visual=logical order generic code below.
8719 // todo: see if additional tweaks according to
8720 // frmline->flags<EXT_LINE_PARA_IS_RTL may help adjusting
8721 // char rects depending on it vs word_is_rtl.
8722 if ( word->src_text_index>=srcIndex || lastWord ) {
8723 // Found word from same or forward src line
8724 if (word->src_text_index > srcIndex &&
8725 ( nearestForwardSrcIndex == -1 ||
8726 word->src_text_index < nearestForwardSrcIndex ||
8727 (word->src_text_index == nearestForwardSrcIndex &&
8728 word->t.start < nearestForwardSrcOffset ) ) ) {
8729 // Found some word from a forward src that is nearest than previously found one:
8730 // get its start as a possible best result.
8731 bestBidiRect.top = rc.top + frmline->y;
8732 bestBidiRect.bottom = bestBidiRect.top + frmline->height;
8733 if ( word_is_rtl ) {
8734 bestBidiRect.right = word->x + word->width + rc.left + frmline->x;
8735 bestBidiRect.left = bestBidiRect.right - 1;
8736 }
8737 else {
8738 bestBidiRect.left = word->x + rc.left + frmline->x;
8739 if (extended) {
8740 if (word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX) && word->width > 0)
8741 bestBidiRect.right = bestBidiRect.left + word->width; // width of image
8742 else
8743 bestBidiRect.right = bestBidiRect.left + 1;
8744 }
8745 }
8746 hasBestBidiRect = true;
8747 nearestForwardSrcIndex = word->src_text_index;
8748 if (word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX))
8749 nearestForwardSrcOffset = 0;
8750 else
8751 nearestForwardSrcOffset = word->t.start;
8752 }
8753 else if (word->src_text_index == srcIndex) {
8754 // Found word in that exact source text node
8755 if ( word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX) ) {
8756 // An image is the single thing in its srcIndex
8757 rect.top = rc.top + frmline->y;
8758 rect.bottom = rect.top + frmline->height;
8759 rect.left = word->x + rc.left + frmline->x;
8760 if (word->width > 0)
8761 rect.right = rect.left + word->width; // width of image
8762 else
8763 rect.right = rect.left + 1;
8764 return true;
8765 }
8766 // Target is in this text node. We may not find it part
8767 // of a word, so look at all words and keep the nearest
8768 // (forward if possible) in case we don't find an exact one
8769 if ( word->t.start > offset ) { // later word in logical order
8770 if (nearestForwardSrcIndex != word->src_text_index ||
8771 word->t.start <= nearestForwardSrcOffset ) {
8772 bestBidiRect.top = rc.top + frmline->y;
8773 bestBidiRect.bottom = bestBidiRect.top + frmline->height;
8774 if ( word_is_rtl ) { // right edge of next logical word, as it is drawn on the left
8775 bestBidiRect.right = word->x + word->width + rc.left + frmline->x;
8776 bestBidiRect.left = bestBidiRect.right - 1;
8777 }
8778 else { // left edge of next logical word, as it is drawn on the right
8779 bestBidiRect.left = word->x + rc.left + frmline->x;
8780 bestBidiRect.right = bestBidiRect.left + 1;
8781 }
8782 hasBestBidiRect = true;
8783 nearestForwardSrcIndex = word->src_text_index;
8784 nearestForwardSrcOffset = word->t.start;
8785 }
8786 }
8787 else if ( word->t.start+word->t.len <= offset ) { // past word in logical order
8788 // Only if/while we haven't yet found one with the right src index and
8789 // a forward offset
8790 if (nearestForwardSrcIndex != word->src_text_index ||
8791 ( nearestForwardSrcOffset < word->t.start &&
8792 word->t.start+word->t.len > nearestForwardSrcOffset ) ) {
8793 bestBidiRect.top = rc.top + frmline->y;
8794 bestBidiRect.bottom = bestBidiRect.top + frmline->height;
8795 if ( word_is_rtl ) { // left edge of previous logical word, as it is drawn on the right
8796 bestBidiRect.left = word->x + rc.left + frmline->x;
8797 bestBidiRect.right = bestBidiRect.left + 1;
8798 }
8799 else { // right edge of previous logical word, as it is drawn on the left
8800 bestBidiRect.right = word->x + word->width + rc.left + frmline->x;
8801 bestBidiRect.left = bestBidiRect.right - 1;
8802 }
8803 hasBestBidiRect = true;
8804 nearestForwardSrcIndex = word->src_text_index;
8805 nearestForwardSrcOffset = word->t.start+word->t.len;
8806 }
8807 }
8808 else { // exact word found
8809 // Measure word
8810 LVFont *font = (LVFont *) txtform->GetSrcInfo(srcIndex)->t.font;
8811 lUInt16 w[512];
8812 lUInt8 flg[512];
8813 lString32 str = node->getText();
8814 if (offset == word->t.start && str.empty()) {
8815 rect.left = word->x + rc.left + frmline->x;
8816 rect.top = rc.top + frmline->y;
8817 rect.right = rect.left + 1;
8818 rect.bottom = rect.top + frmline->height;
8819 return true;
8820 }
8821 // We need to transform the node text as it had been when
8822 // rendered (the transform may change chars widths) for the
8823 // rect to be correct
8824 switch ( node->getParentNode()->getStyle()->text_transform ) {
8825 case css_tt_uppercase:
8826 str.uppercase();
8827 break;
8828 case css_tt_lowercase:
8829 str.lowercase();
8830 break;
8831 case css_tt_capitalize:
8832 str.capitalize();
8833 break;
8834 case css_tt_full_width:
8835 // str.fullWidthChars(); // disabled for now in lvrend.cpp
8836 break;
8837 default:
8838 break;
8839 }
8840 lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags);
8841 font->measureText(
8842 str.c_str()+word->t.start,
8843 word->t.len,
8844 w,
8845 flg,
8846 word->width+50,
8847 '?',
8848 txtform->GetSrcInfo(srcIndex)->lang_cfg,
8849 txtform->GetSrcInfo(srcIndex)->letter_spacing + word->added_letter_spacing,
8850 false,
8851 hints);
8852 rect.top = rc.top + frmline->y;
8853 rect.bottom = rect.top + frmline->height;
8854 // chx is the width of previous chars in the word
8855 int chx = (offset > word->t.start) ? w[ offset - word->t.start - 1 ] : 0;
8856 if ( word_is_rtl ) {
8857 rect.right = word->x + word->width - chx + rc.left + frmline->x;
8858 rect.left = rect.right - 1;
8859 }
8860 else {
8861 rect.left = word->x + chx + rc.left + frmline->x;
8862 rect.right = rect.left + 1;
8863 }
8864 if (extended) { // get width of char at offset
8865 if (offset == word->t.start && word->t.len == 1) {
8866 // With CJK chars, the measured width seems
8867 // less correct than the one measured while
8868 // making words. So use the calculated word
8869 // width for one-char-long words instead
8870 if ( word_is_rtl )
8871 rect.left = rect.right - word->width;
8872 else
8873 rect.right = rect.left + word->width;
8874 }
8875 else {
8876 int chw = w[ offset - word->t.start ] - chx;
8877 bool hyphen_added = false;
8878 if ( offset == word->t.start + word->t.len - 1
8879 && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) ) {
8880 // if offset is the end of word, and this word has
8881 // been hyphenated, includes the hyphen width
8882 chw += font->getHyphenWidth();
8883 // We then should not account for the right side
8884 // bearing below
8885 hyphen_added = true;
8886 }
8887 if ( word_is_rtl ) {
8888 rect.left = rect.right - chw;
8889 if ( !hyphen_added ) {
8890 // Also remove our added letter spacing for justification
8891 // from the left, to have cleaner highlights.
8892 rect.left += word->added_letter_spacing;
8893 }
8894 }
8895 else {
8896 rect.right = rect.left + chw;
8897 if ( !hyphen_added ) {
8898 // Also remove our added letter spacing for justification
8899 // from the right, to have cleaner highlights.
8900 rect.right -= word->added_letter_spacing;
8901 }
8902 }
8903 if (adjusted) {
8904 // Extend left or right if this glyph overflows its
8905 // origin/advance box (can happen with an italic font,
8906 // or with a regular font on the right of the letter 'f'
8907 // or on the left of the letter 'J').
8908 // Only when negative (overflow) and not when positive
8909 // (which are more frequent), mostly to keep some good
8910 // looking rectangles on the sides when highlighting
8911 // multiple lines.
8912 rect.left += font->getLeftSideBearing(str[offset], true);
8913 if ( !hyphen_added )
8914 rect.right -= font->getRightSideBearing(str[offset], true);
8915 // Should work wheter rtl or ltr
8916 }
8917 }
8918 // Ensure we always return a non-zero width, even for zero-width
8919 // chars or collapsed spaces (to avoid isEmpty() returning true
8920 // which could be considered as a failure)
8921 if ( rect.right <= rect.left ) {
8922 if ( word_is_rtl )
8923 rect.left = rect.right - 1;
8924 else
8925 rect.right = rect.left + 1;
8926 }
8927 }
8928 return true;
8929 }
8930 }
8931 if ( lastWord ) {
8932 // If no exact word found, return best candidate
8933 if (hasBestBidiRect) {
8934 rect = bestBidiRect;
8935 return true;
8936 }
8937 // Otherwise, return end of last word (?)
8938 rect.top = rc.top + frmline->y;
8939 rect.bottom = rect.top + frmline->height;
8940 rect.left = word->x + rc.left + frmline->x + word->width;
8941 rect.right = rect.left + 1;
8942 return true;
8943 }
8944 }
8945 continue;
8946 } // end if line_is_bidi
8947
8948 // ================================
8949 // Generic code when visual order = logical order
8950 if ( word->src_text_index>=srcIndex || lastWord ) {
8951 // found word from same src line
8952 if ( word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX)
8953 || word->src_text_index > srcIndex
8954 || (!extended && offset <= word->t.start)
8955 || (extended && offset < word->t.start)
8956 // if extended, and offset = word->t.start, we want to
8957 // measure the first char, which is done in the next else
8958 ) {
8959 // before this word
8960 rect.left = word->x + rc.left + frmline->x;
8961 //rect.top = word->y + rc.top + frmline->y + frmline->baseline;
8962 rect.top = rc.top + frmline->y;
8963 if (extended) {
8964 if (word->flags & (LTEXT_WORD_IS_OBJECT|LTEXT_WORD_IS_INLINE_BOX) && word->width > 0)
8965 rect.right = rect.left + word->width; // width of image
8966 else
8967 rect.right = rect.left + 1; // not the right word: no char width
8968 }
8969 else {
8970 rect.right = rect.left + 1;
8971 }
8972 rect.bottom = rect.top + frmline->height;
8973 return true;
8974 } else if ( (word->src_text_index == srcIndex) &&
8975 ( (offset < word->t.start+word->t.len) ||
8976 (offset==srcLen && offset == word->t.start+word->t.len) ) ) {
8977 // pointer inside this word
8978 LVFont *font = (LVFont *) txtform->GetSrcInfo(srcIndex)->t.font;
8979 lUInt16 w[512];
8980 lUInt8 flg[512];
8981 lString32 str = node->getText();
8982 // With "|| (extended && offset < word->t.start)" added to the first if
8983 // above, we may now be here with: offset = word->t.start = 0
8984 // and a node->getText() returning THE lString32::empty_str:
8985 // font->measureText() would segfault on it because its just a dummy
8986 // pointer. Not really sure why that happens.
8987 // It happens when node is the <a> in:
8988 // <div><span> <a id="someId"/>Anciens </span> <b>...
8989 // and offset=0, word->t.start=0, word->t.len=8 .
8990 // We can just do as in the first 'if'.
8991 if (offset == word->t.start && str.empty()) {
8992 rect.left = word->x + rc.left + frmline->x;
8993 rect.top = rc.top + frmline->y;
8994 rect.right = rect.left + 1;
8995 rect.bottom = rect.top + frmline->height;
8996 return true;
8997 }
8998 // We need to transform the node text as it had been when
8999 // rendered (the transform may change chars widths) for the
9000 // rect to be correct
9001 switch ( node->getParentNode()->getStyle()->text_transform ) {
9002 case css_tt_uppercase:
9003 str.uppercase();
9004 break;
9005 case css_tt_lowercase:
9006 str.lowercase();
9007 break;
9008 case css_tt_capitalize:
9009 str.capitalize();
9010 break;
9011 case css_tt_full_width:
9012 // str.fullWidthChars(); // disabled for now in lvrend.cpp
9013 break;
9014 default:
9015 break;
9016 }
9017 lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags);
9018 font->measureText(
9019 str.c_str()+word->t.start,
9020 word->t.len,
9021 w,
9022 flg,
9023 word->width+50,
9024 '?',
9025 txtform->GetSrcInfo(srcIndex)->lang_cfg,
9026 txtform->GetSrcInfo(srcIndex)->letter_spacing + word->added_letter_spacing,
9027 false,
9028 hints );
9029 // chx is the width of previous chars in the word
9030 int chx = (offset > word->t.start) ? w[ offset - word->t.start - 1 ] : 0;
9031 rect.left = word->x + chx + rc.left + frmline->x;
9032 //rect.top = word->y + rc.top + frmline->y + frmline->baseline;
9033 rect.top = rc.top + frmline->y;
9034 if (extended) { // get width of char at offset
9035 if (offset == word->t.start && word->t.len == 1) {
9036 // With CJK chars, the measured width seems
9037 // less correct than the one measured while
9038 // making words. So use the calculated word
9039 // width for one-char-long words instead
9040 rect.right = rect.left + word->width;
9041 }
9042 else {
9043 int chw = w[ offset - word->t.start ] - chx;
9044 bool hyphen_added = false;
9045 if ( offset == word->t.start + word->t.len - 1
9046 && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) ) {
9047 // if offset is the end of word, and this word has
9048 // been hyphenated, includes the hyphen width
9049 chw += font->getHyphenWidth();
9050 // We then should not account for the right side
9051 // bearing below
9052 hyphen_added = true;
9053 }
9054 rect.right = rect.left + chw;
9055 if ( !hyphen_added ) {
9056 // Also remove our added letter spacing for justification
9057 // from the right, to have cleaner highlights.
9058 rect.right -= word->added_letter_spacing;
9059 }
9060 if (adjusted) {
9061 // Extend left or right if this glyph overflows its
9062 // origin/advance box (can happen with an italic font,
9063 // or with a regular font on the right of the letter 'f'
9064 // or on the left of the letter 'J').
9065 // Only when negative (overflow) and not when positive
9066 // (which are more frequent), mostly to keep some good
9067 // looking rectangles on the sides when highlighting
9068 // multiple lines.
9069 rect.left += font->getLeftSideBearing(str[offset], true);
9070 if ( !hyphen_added )
9071 rect.right -= font->getRightSideBearing(str[offset], true);
9072 }
9073 }
9074 // Ensure we always return a non-zero width, even for zero-width
9075 // chars or collapsed spaces (to avoid isEmpty() returning true
9076 // which could be considered as a failure)
9077 if ( rect.right <= rect.left )
9078 rect.right = rect.left + 1;
9079 }
9080 else
9081 rect.right = rect.left + 1;
9082 rect.bottom = rect.top + frmline->height;
9083 return true;
9084 } else if (lastWord) {
9085 // after last word
9086 rect.left = word->x + rc.left + frmline->x + word->width;
9087 //rect.top = word->y + rc.top + frmline->y + frmline->baseline;
9088 rect.top = rc.top + frmline->y;
9089 if (extended)
9090 rect.right = rect.left + 1; // not the right word: no char width
9091 else
9092 rect.right = rect.left + 1;
9093 rect.bottom = rect.top + frmline->height;
9094 return true;
9095 }
9096 }
9097 }
9098 }
9099 // return false;
9100 // Not found, which is possible with a final node with only empty
9101 // elements. This final node has a rect, so use it.
9102 rect = rc;
9103 return true;
9104 } else {
9105 // no base final node, using blocks
9106 //lvRect rc;
9107 ldomNode * node = getNode();
9108 int offset = getOffset();
9109 if ( offset<0 || node->getChildCount()==0 ) {
9110 node->getAbsRect( rect );
9111 return true;
9112 //return rc.topLeft();
9113 }
9114 if ( offset < (int)node->getChildCount() ) {
9115 node->getChildNode(offset)->getAbsRect( rect );
9116 return true;
9117 //return rc.topLeft();
9118 }
9119 node->getChildNode(node->getChildCount()-1)->getAbsRect( rect );
9120 return true;
9121 //return rc.bottomRight();
9122 }
9123 }
9124 #endif
9125
isBoxingNode(ldomNode * node)9126 static bool isBoxingNode(ldomNode * node)
9127 {
9128 // In the context this is used (xpointers), handle pseudoElems (that don't
9129 // box anything) just as boxing nodes: ignoring them in XPointers.
9130 return node->isBoxingNode(true);
9131 }
9132
isTextNode(ldomNode * node)9133 static bool isTextNode(ldomNode * node)
9134 {
9135 return (node && node->isText());
9136 }
9137
9138 struct ldomNodeIdPredicate
9139 {
9140 lUInt16 m_id;
ldomNodeIdPredicateldomNodeIdPredicate9141 ldomNodeIdPredicate(lUInt16 id) : m_id(id) {}
operator ()ldomNodeIdPredicate9142 bool operator() (ldomNode * node) {
9143 return (node && node->getNodeId() == m_id);
9144 }
9145 };
9146
notNull(ldomNode * node)9147 static bool notNull(ldomNode * node)
9148 {
9149 return (NULL != node);
9150 }
9151
9152 template<typename T>
getNodeByIndex(ldomNode * parent,int index,T predicat,int & count)9153 static ldomNode * getNodeByIndex(ldomNode *parent, int index, T predicat, int& count)
9154 {
9155 ldomNode *foundNode = NULL;
9156
9157 for( int i=0; i < (int)parent->getChildCount(); i++) {
9158 ldomNode * p = parent->getChildNode(i);
9159 if( isBoxingNode(p) ) {
9160 foundNode = getNodeByIndex(p, index, predicat, count);
9161 if( foundNode )
9162 return foundNode;
9163 } else if(predicat(p)) {
9164 count++;
9165 if(index == -1 || count == index) {
9166 if( !foundNode )
9167 foundNode = p;
9168 return foundNode;
9169 }
9170 }
9171 }
9172 return NULL;
9173 }
9174
9175 /// create XPointer from relative pointer non-normalized string made by toStringV1()
createXPointerV1(ldomNode * baseNode,const lString32 & xPointerStr)9176 ldomXPointer ldomDocument::createXPointerV1( ldomNode * baseNode, const lString32 & xPointerStr )
9177 {
9178 //CRLog::trace( "ldomDocument::createXPointer(%s)", UnicodeToUtf8(xPointerStr).c_str() );
9179 if ( xPointerStr.empty() || !baseNode )
9180 return ldomXPointer();
9181 const lChar32 * str = xPointerStr.c_str();
9182 int index = -1;
9183 ldomNode * currNode = baseNode;
9184 lString32 name;
9185 lString8 ptr8 = UnicodeToUtf8(xPointerStr);
9186 //const char * ptr = ptr8.c_str();
9187 xpath_step_t step_type;
9188
9189 while ( *str ) {
9190 //CRLog::trace( " %s", UnicodeToUtf8(lString32(str)).c_str() );
9191 step_type = ParseXPathStep( str, name, index );
9192 //CRLog::trace( " name=%s index=%d", UnicodeToUtf8(lString32(name)).c_str(), index );
9193 switch (step_type ) {
9194 case xpath_step_error:
9195 // error
9196 //CRLog::trace(" xpath_step_error");
9197 return ldomXPointer();
9198 case xpath_step_element:
9199 // element of type 'name' with 'index' /elemname[N]/
9200 {
9201 lUInt16 id = getElementNameIndex( name.c_str() );
9202 ldomNode * foundItem = currNode->findChildElement(LXML_NS_ANY, id, index > 0 ? index - 1 : -1);
9203 if (foundItem == NULL && currNode->getChildCount() == 1) {
9204 // make saved pointers work properly even after moving of some part of path one element deeper
9205 foundItem = currNode->getChildNode(0)->findChildElement(LXML_NS_ANY, id, index > 0 ? index - 1 : -1);
9206 }
9207 // int foundCount = 0;
9208 // for (unsigned i=0; i<currNode->getChildCount(); i++) {
9209 // ldomNode * p = currNode->getChildNode(i);
9210 // //CRLog::trace( " node[%d] = %d %s", i, p->getNodeId(), LCSTR(p->getNodeName()) );
9211 // if ( p && p->isElement() && p->getNodeId()==id ) {
9212 // foundCount++;
9213 // if ( foundCount==index || index==-1 ) {
9214 // foundItem = p;
9215 // break; // DON'T CHECK WHETHER OTHER ELEMENTS EXIST
9216 // }
9217 // }
9218 // }
9219 // if ( foundItem==NULL || (index==-1 && foundCount>1) ) {
9220 // //CRLog::trace(" Element %d is not found. foundCount=%d", id, foundCount);
9221 // return ldomXPointer(); // node not found
9222 // }
9223 if (foundItem == NULL) {
9224 //CRLog::trace(" Element %d is not found. foundCount=%d", id, foundCount);
9225 return ldomXPointer(); // node not found
9226 }
9227 // found element node
9228 currNode = foundItem;
9229 }
9230 break;
9231 case xpath_step_text:
9232 // text node with 'index' /text()[N]/
9233 {
9234 ldomNode * foundItem = NULL;
9235 int foundCount = 0;
9236 for (int i=0; i<currNode->getChildCount(); i++) {
9237 ldomNode * p = currNode->getChildNode(i);
9238 if ( p->isText() ) {
9239 foundCount++;
9240 if ( foundCount==index || index==-1 ) {
9241 foundItem = p;
9242 }
9243 }
9244 }
9245 if ( foundItem==NULL || (index==-1 && foundCount>1) )
9246 return ldomXPointer(); // node not found
9247 // found text node
9248 currNode = foundItem;
9249 }
9250 break;
9251 case xpath_step_nodeindex:
9252 // node index /N/
9253 if ( index<=0 || index>(int)currNode->getChildCount() )
9254 return ldomXPointer(); // node not found: invalid index
9255 currNode = currNode->getChildNode( index-1 );
9256 break;
9257 case xpath_step_point:
9258 // point index .N
9259 if (*str)
9260 return ldomXPointer(); // not at end of string
9261 if ( currNode->isElement() ) {
9262 // element point
9263 if ( index<0 || index>(int)currNode->getChildCount() )
9264 return ldomXPointer();
9265 return ldomXPointer(currNode, index);
9266 } else {
9267 // text point
9268 if ( index<0 || index>(int)currNode->getText().length() )
9269 return ldomXPointer();
9270 return ldomXPointer(currNode, index);
9271 }
9272 break;
9273 }
9274 }
9275 return ldomXPointer( currNode, -1 ); // XPath: index==-1
9276 }
9277
9278 /// create XPointer from relative pointer normalized string made by toStringV2()
createXPointerV2(ldomNode * baseNode,const lString32 & xPointerStr)9279 ldomXPointer ldomDocument::createXPointerV2( ldomNode * baseNode, const lString32 & xPointerStr )
9280 {
9281 //CRLog::trace( "ldomDocument::createXPointer(%s)", UnicodeToUtf8(xPointerStr).c_str() );
9282 if ( xPointerStr.empty() || !baseNode )
9283 return ldomXPointer();
9284 const lChar32 * str = xPointerStr.c_str();
9285 int index = -1;
9286 int count;
9287 ldomNode * currNode = baseNode;
9288 ldomNode * foundNode;
9289 lString32 name;
9290 xpath_step_t step_type;
9291
9292 while ( *str ) {
9293 //CRLog::trace( " %s", UnicodeToUtf8(lString32(str)).c_str() );
9294 step_type = ParseXPathStep( str, name, index );
9295 //CRLog::trace( " name=%s index=%d", UnicodeToUtf8(lString32(name)).c_str(), index );
9296 switch (step_type ) {
9297 case xpath_step_error:
9298 // error
9299 //CRLog::trace(" xpath_step_error");
9300 return ldomXPointer();
9301 case xpath_step_element:
9302 // element of type 'name' with 'index' /elemname[N]/
9303 {
9304 ldomNodeIdPredicate predicat(getElementNameIndex( name.c_str() ));
9305 count = 0;
9306 foundNode = getNodeByIndex(currNode, index, predicat, count);
9307 if (foundNode == NULL) {
9308 //CRLog::trace(" Element %d is not found. foundCount=%d", id, foundCount);
9309 return ldomXPointer(); // node not found
9310 }
9311 // found element node
9312 currNode = foundNode;
9313 lString32 nm = currNode->getNodeName();
9314 CRLog::trace("%d -> %s", index, LCSTR(nm));
9315 }
9316 break;
9317 case xpath_step_text:
9318 //
9319 count = 0;
9320 foundNode = getNodeByIndex(currNode, index, isTextNode, count);
9321
9322 if ( foundNode==NULL )
9323 return ldomXPointer(); // node not found
9324 // found text node
9325 currNode = foundNode;
9326 break;
9327 case xpath_step_nodeindex:
9328 // node index /N/
9329 count = 0;
9330 foundNode = getNodeByIndex(currNode, index, notNull, count);
9331 if ( foundNode == NULL )
9332 return ldomXPointer(); // node not found: invalid index
9333 currNode = foundNode;
9334 break;
9335 case xpath_step_point:
9336 // point index .N
9337 if (*str)
9338 return ldomXPointer(); // not at end of string
9339 if ( currNode->isElement() ) {
9340 // element point
9341 if ( index<0 || index>(int)currNode->getChildCount() )
9342 return ldomXPointer();
9343 return ldomXPointer(currNode, index);
9344 } else {
9345 // text point
9346 if ( index<0 || index>(int)currNode->getText().length() )
9347 return ldomXPointer();
9348 return ldomXPointer(currNode, index);
9349 }
9350 break;
9351 }
9352 }
9353 return ldomXPointer( currNode, -1 ); // XPath: index==-1
9354 }
9355
9356 /// returns XPath segment for this element relative to parent element (e.g. "p[10]")
getXPathSegment()9357 lString32 ldomNode::getXPathSegment()
9358 {
9359 if ( isNull() || isRoot() )
9360 return lString32::empty_str;
9361 ldomNode * parent = getParentNode();
9362 int cnt = parent->getChildCount();
9363 int index = 0;
9364 if ( isElement() ) {
9365 int id = getNodeId();
9366 for ( int i=0; i<cnt; i++ ) {
9367 ldomNode * node = parent->getChildNode(i);
9368 if ( node == this ) {
9369 return getNodeName() + "[" + fmt::decimal(index+1) + "]";
9370 }
9371 if ( node->isElement() && node->getNodeId()==id )
9372 index++;
9373 }
9374 } else {
9375 for ( int i=0; i<cnt; i++ ) {
9376 ldomNode * node = parent->getChildNode(i);
9377 if ( node == this ) {
9378 return "text()[" + lString32::itoa(index+1) + "]";
9379 }
9380 if ( node->isText() )
9381 index++;
9382 }
9383 }
9384 return lString32::empty_str;
9385 }
9386
9387 // Using names, old, with boxing elements (non-normalized)
toStringV1()9388 lString32 ldomXPointer::toStringV1()
9389 {
9390 lString32 path;
9391 if ( isNull() )
9392 return path;
9393 ldomNode * node = getNode();
9394 int offset = getOffset();
9395 if ( offset >= 0 ) {
9396 path << "." << fmt::decimal(offset);
9397 }
9398 ldomNode * p = node;
9399 ldomNode * mainNode = node->getDocument()->getRootNode();
9400 while (p && p!=mainNode) {
9401 ldomNode * parent = p->getParentNode();
9402 if ( p->isElement() ) {
9403 // element
9404 lString32 name = p->getNodeName();
9405 lUInt16 id = p->getNodeId();
9406 if ( !parent )
9407 return "/" + name + path;
9408 int index = -1;
9409 int count = 0;
9410 for ( int i=0; i<parent->getChildCount(); i++ ) {
9411 ldomNode * node = parent->getChildElementNode( i, id );
9412 if ( node ) {
9413 count++;
9414 if ( node==p )
9415 index = count;
9416 }
9417 }
9418 if ( count>1 )
9419 path = cs32("/") + name + "[" + fmt::decimal(index) + "]" + path;
9420 else
9421 path = cs32("/") + name + path;
9422 } else {
9423 // text
9424 if ( !parent )
9425 return cs32("/text()") + path;
9426 int index = -1;
9427 int count = 0;
9428 for ( int i=0; i<parent->getChildCount(); i++ ) {
9429 ldomNode * node = parent->getChildNode( i );
9430 if ( node->isText() ) {
9431 count++;
9432 if ( node==p )
9433 index = count;
9434 }
9435 }
9436 if ( count>1 )
9437 path = cs32("/text()") + "[" + fmt::decimal(index) + "]" + path;
9438 else
9439 path = "/text()" + path;
9440 }
9441 p = parent;
9442 }
9443 return path;
9444 }
9445
9446 template<typename T>
getElementIndex(ldomNode * parent,ldomNode * targetNode,T predicat,int & count)9447 static int getElementIndex(ldomNode* parent, ldomNode *targetNode, T predicat, int& count)
9448 {
9449 for ( int i=0; i<parent->getChildCount(); i++ ) {
9450 ldomNode * node = parent->getChildNode( i );
9451 if( isBoxingNode(node) && targetNode != node ) {
9452 int index = getElementIndex(node, targetNode, predicat, count);
9453 if(index > 0)
9454 return index;
9455 } else if (predicat(node))
9456 count++;
9457 if ( node==targetNode )
9458 return count;
9459 }
9460 return -1;
9461 }
9462
9463 // Using names, new, without boxing elements, so: normalized
toStringV2()9464 lString32 ldomXPointer::toStringV2()
9465 {
9466 lString32 path;
9467 if ( isNull() )
9468 return path;
9469 ldomNode * node = getNode();
9470 int offset = getOffset();
9471 ldomNode * p = node;
9472 if ( !node->isBoxingNode(true) ) { // (nor pseudoElem)
9473 if ( offset >= 0 ) {
9474 path << "." << fmt::decimal(offset);
9475 }
9476 }
9477 else {
9478 if ( offset < p->getChildCount() )
9479 p = p->getChildNode(offset);
9480 else
9481 p = p->getParentNode();
9482 }
9483 ldomNode * mainNode = node->getDocument()->getRootNode();
9484 while (p && p!=mainNode) {
9485 ldomNode * parent = p->getParentNode();
9486 while( isBoxingNode(parent) )
9487 parent = parent->getParentNode();
9488 if ( p->isElement() ) {
9489 // element
9490 lString32 name = p->getNodeName();
9491 if ( !parent )
9492 return "/" + name + path;
9493 int count = 0;
9494 ldomNodeIdPredicate predicat(p->getNodeId());
9495 int index = getElementIndex(parent, p, predicat, count);
9496 if ( count == 1 ) {
9497 // We're first, but see if we have following siblings with the
9498 // same element name, so we can have "div[1]" instead of "div"
9499 // when parent has more than one of it (as toStringV1 does).
9500 ldomNode * n = p;
9501 while ( ( n = n->getUnboxedNextSibling(true) ) ) {
9502 if ( predicat(n) ) { // We have such a followup sibling
9503 count = 2; // there's at least 2 of them
9504 break;
9505 }
9506 }
9507 }
9508 if ( count>1 )
9509 path = cs32("/") + name + "[" + fmt::decimal(index) + "]" + path;
9510 else
9511 path = cs32("/") + name + path;
9512 } else {
9513 // text
9514 if ( !parent )
9515 return cs32("/text()") + path;
9516 int count = 0;
9517 int index = getElementIndex(parent, p, isTextNode, count);
9518 if ( count == 1 ) {
9519 // We're first, but see if we have following text siblings,
9520 // so we can have "text()[1]" instead of "text()" when
9521 // parent has more than one text node (as toStringV1 does).
9522 ldomNode * n = p;
9523 while ( ( n = n->getUnboxedNextSibling(false) ) ) {
9524 if ( isTextNode(n) ) { // We have such a followup sibling
9525 count = 2; // there's at least 2 of them
9526 break;
9527 }
9528 }
9529 }
9530 if ( count>1 )
9531 path = cs32("/text()") + "[" + fmt::decimal(index) + "]" + path;
9532 else
9533 path = "/text()" + path;
9534 }
9535 p = parent;
9536 }
9537 return path;
9538 }
9539
9540 // Without element names, normalized (not used)
toStringV2AsIndexes()9541 lString32 ldomXPointer::toStringV2AsIndexes()
9542 {
9543 lString32 path;
9544 if ( isNull() )
9545 return path;
9546 int offset = getOffset();
9547 if ( offset >= 0 ) {
9548 path << "." << fmt::decimal(offset);
9549 }
9550 ldomNode * p = getNode();
9551 ldomNode * rootNode = p->getDocument()->getRootNode();
9552 while( p && p!=rootNode ) {
9553 ldomNode * parent = p->getParentNode();
9554 if ( !parent )
9555 return "/" + (p->isElement() ? p->getNodeName() : cs32("/text()")) + path;
9556
9557 while( isBoxingNode(parent) )
9558 parent = parent->getParentNode();
9559
9560 int count = 0;
9561 int index = getElementIndex(parent, p, notNull, count);
9562
9563 if( index>0 ) {
9564 path = cs32("/") + fmt::decimal(index) + path;
9565 } else {
9566 CRLog::error("!!! child node not found in a parent");
9567 }
9568 p = parent;
9569 }
9570 return path;
9571 }
9572
9573 #if BUILD_LITE!=1
getFullHeight()9574 int ldomDocument::getFullHeight()
9575 {
9576 RenderRectAccessor rd( this->getRootNode() );
9577 return rd.getHeight() + rd.getY();
9578 }
9579 #endif
9580
9581
9582
9583
extractDocAuthors(ldomDocument * doc,lString32 delimiter,bool shortMiddleName)9584 lString32 extractDocAuthors( ldomDocument * doc, lString32 delimiter, bool shortMiddleName )
9585 {
9586 if ( delimiter.empty() )
9587 delimiter = ", ";
9588 lString32 authors;
9589 for ( int i=0; i<16; i++) {
9590 lString32 path = cs32("/FictionBook/description/title-info/author[") + fmt::decimal(i+1) + "]";
9591 ldomXPointer pauthor = doc->createXPointer(path);
9592 if ( !pauthor ) {
9593 //CRLog::trace( "xpath not found: %s", UnicodeToUtf8(path).c_str() );
9594 break;
9595 }
9596 lString32 firstName = pauthor.relative( U"/first-name" ).getText().trim();
9597 lString32 lastName = pauthor.relative( U"/last-name" ).getText().trim();
9598 lString32 middleName = pauthor.relative( U"/middle-name" ).getText().trim();
9599 lString32 author = firstName;
9600 if ( !author.empty() )
9601 author += " ";
9602 if ( !middleName.empty() )
9603 author += shortMiddleName ? lString32(middleName, 0, 1) + "." : middleName;
9604 if ( !lastName.empty() && !author.empty() )
9605 author += " ";
9606 author += lastName;
9607 if ( !authors.empty() )
9608 authors += delimiter;
9609 authors += author;
9610 }
9611 return authors;
9612 }
9613
extractDocTitle(ldomDocument * doc)9614 lString32 extractDocTitle( ldomDocument * doc )
9615 {
9616 return doc->createXPointer(U"/FictionBook/description/title-info/book-title").getText().trim();
9617 }
9618
extractDocLanguage(ldomDocument * doc)9619 lString32 extractDocLanguage( ldomDocument * doc )
9620 {
9621 return doc->createXPointer(U"/FictionBook/description/title-info/lang").getText().trim();
9622 }
9623
extractDocSeries(ldomDocument * doc,int * pSeriesNumber)9624 lString32 extractDocSeries( ldomDocument * doc, int * pSeriesNumber )
9625 {
9626 lString32 res;
9627 ldomNode * series = doc->createXPointer(U"/FictionBook/description/title-info/sequence").getNode();
9628 if ( series ) {
9629 lString32 sname = lString32(series->getAttributeValue(attr_name)).trim();
9630 lString32 snumber = series->getAttributeValue(attr_number);
9631 if ( !sname.empty() ) {
9632 if ( pSeriesNumber ) {
9633 *pSeriesNumber = snumber.atoi();
9634 res = sname;
9635 } else {
9636 res << "(" << sname;
9637 if ( !snumber.empty() )
9638 res << " #" << snumber << ")";
9639 }
9640 }
9641 }
9642 return res;
9643 }
9644
extractDocKeywords(ldomDocument * doc)9645 lString32 extractDocKeywords( ldomDocument * doc )
9646 {
9647 lString32 res;
9648 #if 0
9649 // Year
9650 res << doc->createXPointer(U"/FictionBook/description/title-info/date").getText().trim();
9651 #endif
9652 // Genres
9653 // We use "\n" as a separator here, so if you change it here, you must also change it in
9654 // Engine.scanBookPropertiesInternal(), DocView.updateBookInfoInternal().
9655 for ( int i=0; i<16; i++) {
9656 lString32 path = cs32("/FictionBook/description/title-info/genre[") + fmt::decimal(i+1) + "]";
9657 ldomXPointer genre = doc->createXPointer(path);
9658 if ( !genre ) {
9659 break;
9660 }
9661 lString32 text = genre.getText().trim();
9662 if (!text.empty()) {
9663 if (!res.empty())
9664 res << "\n";
9665 res << text;
9666 }
9667 }
9668 return res;
9669 }
9670
extractDocDescription(ldomDocument * doc)9671 lString32 extractDocDescription( ldomDocument * doc )
9672 {
9673 // We put all other FB2 meta info in this description
9674 lString32 res;
9675
9676 // Annotation (description)
9677 res << doc->createXPointer(U"/FictionBook/description/title-info/annotation").getText().trim();
9678
9679 // Translators
9680 lString32 translators;
9681 int nbTranslators = 0;
9682 for ( int i=0; i<16; i++) {
9683 lString32 path = cs32("/FictionBook/description/title-info/translator[") + fmt::decimal(i+1) + "]";
9684 ldomXPointer ptranslator = doc->createXPointer(path);
9685 if ( !ptranslator ) {
9686 break;
9687 }
9688 lString32 firstName = ptranslator.relative( U"/first-name" ).getText().trim();
9689 lString32 lastName = ptranslator.relative( U"/last-name" ).getText().trim();
9690 lString32 middleName = ptranslator.relative( U"/middle-name" ).getText().trim();
9691 lString32 translator = firstName;
9692 if ( !translator.empty() )
9693 translator += " ";
9694 if ( !middleName.empty() )
9695 translator += middleName;
9696 if ( !lastName.empty() && !translator.empty() )
9697 translator += " ";
9698 translator += lastName;
9699 if ( !translators.empty() )
9700 translators << "\n";
9701 translators << translator;
9702 nbTranslators++;
9703 }
9704 if ( !translators.empty() ) {
9705 if ( !res.empty() )
9706 res << "\n\n";
9707 if ( nbTranslators > 1 )
9708 res << "Translators:\n" << translators;
9709 else
9710 res << "Translator: " << translators;
9711 }
9712
9713 // Publication info & publisher
9714 ldomXPointer publishInfo = doc->createXPointer(U"/FictionBook/description/publish-info");
9715 if ( !publishInfo.isNull() ) {
9716 lString32 publisher = publishInfo.relative( U"/publisher" ).getText().trim();
9717 lString32 pubcity = publishInfo.relative( U"/city" ).getText().trim();
9718 lString32 pubyear = publishInfo.relative( U"/year" ).getText().trim();
9719 lString32 isbn = publishInfo.relative( U"/isbn" ).getText().trim();
9720 lString32 bookName = publishInfo.relative( U"/book-name" ).getText().trim();
9721 lString32 publication;
9722 if ( !publisher.empty() || !pubcity.empty() ) {
9723 if ( !publisher.empty() ) {
9724 publication << publisher;
9725 }
9726 if ( !pubcity.empty() ) {
9727 if ( !!publisher.empty() ) {
9728 publication << ", ";
9729 }
9730 publication << pubcity;
9731 }
9732 }
9733 if ( !pubyear.empty() || !isbn.empty() ) {
9734 if ( !publication.empty() )
9735 publication << "\n";
9736 if ( !pubyear.empty() ) {
9737 publication << pubyear;
9738 }
9739 if ( !isbn.empty() ) {
9740 if ( !pubyear.empty() ) {
9741 publication << ", ";
9742 }
9743 publication << isbn;
9744 }
9745 }
9746 if ( !bookName.empty() ) {
9747 if ( !publication.empty() )
9748 publication << "\n";
9749 publication << bookName;
9750 }
9751 if ( !publication.empty() ) {
9752 if ( !res.empty() )
9753 res << "\n\n";
9754 res << "Publication:\n" << publication;
9755 }
9756 }
9757
9758 // Document info
9759 ldomXPointer pDocInfo = doc->createXPointer(U"/FictionBook/description/document-info");
9760 if ( !pDocInfo.isNull() ) {
9761 lString32 docInfo;
9762 lString32 docAuthors;
9763 int nbAuthors = 0;
9764 for ( int i=0; i<16; i++) {
9765 lString32 path = cs32("/FictionBook/description/document-info/author[") + fmt::decimal(i+1) + "]";
9766 ldomXPointer pdocAuthor = doc->createXPointer(path);
9767 if ( !pdocAuthor ) {
9768 break;
9769 }
9770 lString32 firstName = pdocAuthor.relative( U"/first-name" ).getText().trim();
9771 lString32 lastName = pdocAuthor.relative( U"/last-name" ).getText().trim();
9772 lString32 middleName = pdocAuthor.relative( U"/middle-name" ).getText().trim();
9773 lString32 docAuthor = firstName;
9774 if ( !docAuthor.empty() )
9775 docAuthor += " ";
9776 if ( !middleName.empty() )
9777 docAuthor += middleName;
9778 if ( !lastName.empty() && !docAuthor.empty() )
9779 docAuthor += " ";
9780 docAuthor += lastName;
9781 if ( !docAuthors.empty() )
9782 docAuthors << "\n";
9783 docAuthors << docAuthor;
9784 nbAuthors++;
9785 }
9786 if ( !docAuthors.empty() ) {
9787 if ( nbAuthors > 1 )
9788 docInfo << "Authors:\n" << docAuthors;
9789 else
9790 docInfo << "Author: " << docAuthors;
9791 }
9792 lString32 docPublisher = pDocInfo.relative( U"/publisher" ).getText().trim();
9793 lString32 docId = pDocInfo.relative( U"/id" ).getText().trim();
9794 lString32 docVersion = pDocInfo.relative( U"/version" ).getText().trim();
9795 lString32 docDate = pDocInfo.relative( U"/date" ).getText().trim();
9796 lString32 docHistory = pDocInfo.relative( U"/history" ).getText().trim();
9797 lString32 docSrcUrl = pDocInfo.relative( U"/src-url" ).getText().trim();
9798 lString32 docSrcOcr = pDocInfo.relative( U"/src-ocr" ).getText().trim();
9799 lString32 docProgramUsed = pDocInfo.relative( U"/program-used" ).getText().trim();
9800 if ( !docPublisher.empty() ) {
9801 if ( !docInfo.empty() )
9802 docInfo << "\n";
9803 docInfo << "Publisher: " << docPublisher;
9804 }
9805 if ( !docId.empty() ) {
9806 if ( !docInfo.empty() )
9807 docInfo << "\n";
9808 docInfo << "Id: " << docId;
9809 }
9810 if ( !docVersion.empty() ) {
9811 if ( !docInfo.empty() )
9812 docInfo << "\n";
9813 docInfo << "Version: " << docVersion;
9814 }
9815 if ( !docDate.empty() ) {
9816 if ( !docInfo.empty() )
9817 docInfo << "\n";
9818 docInfo << "Date: " << docDate;
9819 }
9820 if ( !docHistory.empty() ) {
9821 if ( !docInfo.empty() )
9822 docInfo << "\n";
9823 docInfo << "History: " << docHistory;
9824 }
9825 if ( !docSrcUrl.empty() ) {
9826 if ( !docInfo.empty() )
9827 docInfo << "\n";
9828 docInfo << "URL: " << docSrcUrl;
9829 }
9830 if ( !docSrcOcr.empty() ) {
9831 if ( !docInfo.empty() )
9832 docInfo << "\n";
9833 docInfo << "OCR: " << docSrcOcr;
9834 }
9835 if ( !docProgramUsed.empty() ) {
9836 if ( !docInfo.empty() )
9837 docInfo << "\n";
9838 docInfo << "Application: " << docProgramUsed;
9839 }
9840 if ( !docInfo.empty() ) {
9841 if ( !res.empty() )
9842 res << "\n\n";
9843 res << "Document:\n" << docInfo;
9844 }
9845 }
9846
9847 return res;
9848 }
9849
initIndex()9850 void ldomXPointerEx::initIndex()
9851 {
9852 int m[MAX_DOM_LEVEL];
9853 ldomNode * p = getNode();
9854 _level = 0;
9855 while ( p ) {
9856 m[_level] = p->getNodeIndex();
9857 _level++;
9858 if ( _level == MAX_DOM_LEVEL ) {
9859 getDocument()->printWarning("ldomXPointerEx level overflow (too many nested nodes)", 1);
9860 break;
9861 }
9862 p = p->getParentNode();
9863 }
9864 for ( int i=0; i<_level; i++ ) {
9865 _indexes[ i ] = m[ _level - i - 1 ];
9866 }
9867 }
9868
9869 /// move to sibling #
sibling(int index)9870 bool ldomXPointerEx::sibling( int index )
9871 {
9872 if ( _level <= 1 )
9873 return false;
9874 ldomNode * p = getNode()->getParentNode();
9875 if ( !p || index < 0 || index >= (int)p->getChildCount() )
9876 return false;
9877 setNode( p->getChildNode( index ) );
9878 setOffset(0);
9879 _indexes[ _level-1 ] = index;
9880 return true;
9881 }
9882
9883 /// move to next sibling
nextSibling()9884 bool ldomXPointerEx::nextSibling()
9885 {
9886 if ( _level <= 1 )
9887 return false;
9888 return sibling( _indexes[_level-1] + 1 );
9889 }
9890
9891 /// move to previous sibling
prevSibling()9892 bool ldomXPointerEx::prevSibling()
9893 {
9894 if ( _level <= 1 )
9895 return false;
9896 return sibling( _indexes[_level-1] - 1 );
9897 }
9898
9899 /// move to next sibling element
nextSiblingElement()9900 bool ldomXPointerEx::nextSiblingElement()
9901 {
9902 if ( _level <= 1 )
9903 return false;
9904 ldomNode * node = getNode();
9905 ldomNode * p = node->getParentNode();
9906 for ( int i=_indexes[_level-1] + 1; i<(int)p->getChildCount(); i++ ) {
9907 if ( p->getChildNode( i )->isElement() )
9908 return sibling( i );
9909 }
9910 return false;
9911 }
9912
9913 /// move to previous sibling element
prevSiblingElement()9914 bool ldomXPointerEx::prevSiblingElement()
9915 {
9916 if ( _level <= 1 )
9917 return false;
9918 ldomNode * node = getNode();
9919 ldomNode * p = node->getParentNode();
9920 for ( int i=_indexes[_level-1] - 1; i>=0; i-- ) {
9921 if ( p->getChildNode( i )->isElement() )
9922 return sibling( i );
9923 }
9924 return false;
9925 }
9926
9927 /// move to next sibling or parent's next sibling
nextOuterElement()9928 bool ldomXPointerEx::nextOuterElement()
9929 {
9930 if ( !ensureElement() )
9931 return false;
9932 for (;;) {
9933 if ( nextSiblingElement() )
9934 return true;
9935 if ( !parent() )
9936 return false;
9937 }
9938 }
9939
9940 /// move to (end of) last and deepest child node descendant of current node
lastInnerNode(bool toTextEnd)9941 bool ldomXPointerEx::lastInnerNode(bool toTextEnd)
9942 {
9943 if ( !getNode() )
9944 return false;
9945 while ( lastChild() ) {}
9946 if ( isText() && toTextEnd ) {
9947 setOffset(getNode()->getText().length());
9948 }
9949 return true;
9950 }
9951
9952 /// move to (end of) last and deepest child text node descendant of current node
lastInnerTextNode(bool toTextEnd)9953 bool ldomXPointerEx::lastInnerTextNode(bool toTextEnd)
9954 {
9955 if ( !getNode() )
9956 return false;
9957 if ( isText() ) {
9958 if (toTextEnd)
9959 setOffset(getNode()->getText().length());
9960 return true;
9961 }
9962 if ( lastChild() ) {
9963 do {
9964 if (lastInnerTextNode(toTextEnd))
9965 return true;
9966 } while ( prevSibling() );
9967 parent();
9968 }
9969 return false;
9970
9971 }
9972
9973 /// move to parent
parent()9974 bool ldomXPointerEx::parent()
9975 {
9976 if ( _level<=1 )
9977 return false;
9978 setNode( getNode()->getParentNode() );
9979 setOffset(0);
9980 _level--;
9981 return true;
9982 }
9983
9984 /// move to child #
child(int index)9985 bool ldomXPointerEx::child( int index )
9986 {
9987 if ( _level >= MAX_DOM_LEVEL )
9988 return false;
9989 int count = getNode()->getChildCount();
9990 if ( index<0 || index>=count )
9991 return false;
9992 _indexes[ _level++ ] = index;
9993 setNode( getNode()->getChildNode( index ) );
9994 setOffset(0);
9995 return true;
9996 }
9997
9998 /// compare two pointers, returns -1, 0, +1
compare(const ldomXPointerEx & v) const9999 int ldomXPointerEx::compare( const ldomXPointerEx& v ) const
10000 {
10001 int i;
10002 for ( i=0; i<_level && i<v._level; i++ ) {
10003 if ( _indexes[i] < v._indexes[i] )
10004 return -1;
10005 if ( _indexes[i] > v._indexes[i] )
10006 return 1;
10007 }
10008 if ( _level < v._level ) {
10009 return -1;
10010 // if ( getOffset() < v._indexes[i] )
10011 // return -1;
10012 // if ( getOffset() > v._indexes[i] )
10013 // return 1;
10014 // return -1;
10015 }
10016 if ( _level > v._level ) {
10017 if ( _indexes[i] < v.getOffset() )
10018 return -1;
10019 if ( _indexes[i] > v.getOffset() )
10020 return 1;
10021 return 1;
10022 }
10023 if ( getOffset() < v.getOffset() )
10024 return -1;
10025 if ( getOffset() > v.getOffset() )
10026 return 1;
10027 return 0;
10028 }
10029
10030 /// calls specified function recursively for all elements of DOM tree
recurseElements(void (* pFun)(ldomXPointerEx & node))10031 void ldomXPointerEx::recurseElements( void (*pFun)( ldomXPointerEx & node ) )
10032 {
10033 if ( !isElement() )
10034 return;
10035 pFun( *this );
10036 if ( child( 0 ) ) {
10037 do {
10038 recurseElements( pFun );
10039 } while ( nextSibling() );
10040 parent();
10041 }
10042 }
10043
10044 /// calls specified function recursively for all nodes of DOM tree
recurseNodes(void (* pFun)(ldomXPointerEx & node))10045 void ldomXPointerEx::recurseNodes( void (*pFun)( ldomXPointerEx & node ) )
10046 {
10047 if ( !isElement() )
10048 return;
10049 pFun( *this );
10050 if ( child( 0 ) ) {
10051 do {
10052 recurseElements( pFun );
10053 } while ( nextSibling() );
10054 parent();
10055 }
10056 }
10057
10058 /// returns true if this interval intersects specified interval
checkIntersection(ldomXRange & v)10059 bool ldomXRange::checkIntersection( ldomXRange & v )
10060 {
10061 if ( isNull() || v.isNull() )
10062 return false;
10063 if ( _end.compare( v._start ) < 0 )
10064 return false;
10065 if ( _start.compare( v._end ) > 0 )
10066 return false;
10067 return true;
10068 }
10069
10070 /// create list by filtering existing list, to get only values which intersect filter range
ldomXRangeList(ldomXRangeList & srcList,ldomXRange & filter)10071 ldomXRangeList::ldomXRangeList( ldomXRangeList & srcList, ldomXRange & filter )
10072 {
10073 for ( int i=0; i<srcList.length(); i++ ) {
10074 if ( srcList[i]->checkIntersection( filter ) )
10075 LVPtrVector<ldomXRange>::add( new ldomXRange( *srcList[i] ) );
10076 }
10077 }
10078
10079 /// copy constructor of full node range
ldomXRange(ldomNode * p,bool fitEndToLastInnerChild)10080 ldomXRange::ldomXRange( ldomNode * p, bool fitEndToLastInnerChild )
10081 : _start( p, 0 ), _end( p, p->isText() ? p->getText().length() : p->getChildCount() ), _flags(1)
10082 {
10083 // Note: the above initialization seems wrong: for a non-text
10084 // node, offset seems of no-use, and setting it to the number
10085 // of children wouldn't matter (and if the original aim was to
10086 // extend end to include the last child, the range would ignore
10087 // this last child descendants).
10088 // The following change might well be the right behaviour expected
10089 // from ldomXRange(ldomNode) and fixing a bug, but let's keep
10090 // this "fixed" behaviour an option
10091 if (fitEndToLastInnerChild && !p->isText()) {
10092 // Update _end to point to the last deepest inner child node,
10093 // and to the end of its text if it is a text npde.
10094 ldomXPointerEx tmp = _start;
10095 if (tmp.lastInnerNode(true)) {
10096 _end = tmp;
10097 }
10098 }
10099 // Note: code that walks or compare a ldomXRange may include or
10100 // exclude the _end: most often, it's excluded.
10101 // If it is a text node, the end points to text.length(), so after the
10102 // last char, and it then includes the last char.
10103 // If it is a non-text node, we could choose to include or exclude it
10104 // in XPointers comparisons. Including it would have the node included,
10105 // but not its children (because a child is after its parent in
10106 // comparisons), which feels strange.
10107 // So, excluding it looks like the sanest choice.
10108 // But then, with fitEndToLastInnerChild, if that last inner child
10109 // is a <IMG> node, it will be the _end, but won't then be included
10110 // in the range... The proper way to include it then would be to use
10111 // ldomXPointerEx::nextOuterElement(), but this is just a trick (it
10112 // would fail if that node is the last in the document, and
10113 // getNearestParent() would move up unnecessary ancestors...)
10114 // So, better check the functions that we use to see how they would
10115 // cope with that case.
10116 }
10117
_max(const ldomXPointerEx & v1,const ldomXPointerEx & v2)10118 static const ldomXPointerEx & _max( const ldomXPointerEx & v1, const ldomXPointerEx & v2 )
10119 {
10120 int c = v1.compare( v2 );
10121 if ( c>=0 )
10122 return v1;
10123 else
10124 return v2;
10125 }
10126
_min(const ldomXPointerEx & v1,const ldomXPointerEx & v2)10127 static const ldomXPointerEx & _min( const ldomXPointerEx & v1, const ldomXPointerEx & v2 )
10128 {
10129 int c = v1.compare( v2 );
10130 if ( c<=0 )
10131 return v1;
10132 else
10133 return v2;
10134 }
10135
10136 /// create intersection of two ranges
ldomXRange(const ldomXRange & v1,const ldomXRange & v2)10137 ldomXRange::ldomXRange( const ldomXRange & v1, const ldomXRange & v2 )
10138 : _start( _max( v1._start, v2._start ) ), _end( _min( v1._end, v2._end ) )
10139 {
10140 }
10141
10142 /// create list splittiny existing list into non-overlapping ranges
ldomXRangeList(ldomXRangeList & srcList,bool splitIntersections)10143 ldomXRangeList::ldomXRangeList( ldomXRangeList & srcList, bool splitIntersections )
10144 {
10145 if ( srcList.empty() )
10146 return;
10147 int i;
10148 if ( splitIntersections ) {
10149 ldomXRange * maxRange = new ldomXRange( *srcList[0] );
10150 for ( i=1; i<srcList.length(); i++ ) {
10151 if ( srcList[i]->getStart().compare( maxRange->getStart() ) < 0 )
10152 maxRange->setStart( srcList[i]->getStart() );
10153 if ( srcList[i]->getEnd().compare( maxRange->getEnd() ) > 0 )
10154 maxRange->setEnd( srcList[i]->getEnd() );
10155 }
10156 maxRange->setFlags(0);
10157 add( maxRange );
10158 for ( i=0; i<srcList.length(); i++ )
10159 split( srcList[i] );
10160 for ( int i=length()-1; i>=0; i-- ) {
10161 if ( get(i)->getFlags()==0 )
10162 erase( i, 1 );
10163 }
10164 } else {
10165 for ( i=0; i<srcList.length(); i++ )
10166 add( new ldomXRange( *srcList[i] ) );
10167 }
10168 }
10169
10170 /// split into subranges using intersection
split(ldomXRange * r)10171 void ldomXRangeList::split( ldomXRange * r )
10172 {
10173 int i;
10174 for ( i=0; i<length(); i++ ) {
10175 if ( r->checkIntersection( *get(i) ) ) {
10176 ldomXRange * src = remove( i );
10177 int cmp1 = src->getStart().compare( r->getStart() );
10178 int cmp2 = src->getEnd().compare( r->getEnd() );
10179 //TODO: add intersections
10180 if ( cmp1 < 0 && cmp2 < 0 ) {
10181 // 0====== src ======0
10182 // X======= r=========X
10183 // 1111122222222222222
10184 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getStart(), src->getFlags() );
10185 ldomXRange * r2 = new ldomXRange( r->getStart(), src->getEnd(), src->getFlags() | r->getFlags() );
10186 insert( i++, r1 );
10187 insert( i, r2 );
10188 delete src;
10189 } else if ( cmp1 > 0 && cmp2 > 0 ) {
10190 // 0====== src ======0
10191 // X======= r=========X
10192 // 2222222222222233333
10193 ldomXRange * r2 = new ldomXRange( src->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10194 ldomXRange * r3 = new ldomXRange( r->getEnd(), src->getEnd(), src->getFlags() );
10195 insert( i++, r2 );
10196 insert( i, r3 );
10197 delete src;
10198 } else if ( cmp1 < 0 && cmp2 > 0 ) {
10199 // 0====== src ================0
10200 // X======= r=========X
10201 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getStart(), src->getFlags() );
10202 ldomXRange * r2 = new ldomXRange( r->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10203 ldomXRange * r3 = new ldomXRange( r->getEnd(), src->getEnd(), src->getFlags() );
10204 insert( i++, r1 );
10205 insert( i++, r2 );
10206 insert( i, r3 );
10207 delete src;
10208 } else if ( cmp1 == 0 && cmp2 > 0 ) {
10209 // 0====== src ========0
10210 // X====== r=====X
10211 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10212 ldomXRange * r2 = new ldomXRange( r->getEnd(), src->getEnd(), src->getFlags() );
10213 insert( i++, r1 );
10214 insert( i, r2 );
10215 delete src;
10216 } else if ( cmp1 < 0 && cmp2 == 0 ) {
10217 // 0====== src =====0
10218 // X====== r=====X
10219 ldomXRange * r1 = new ldomXRange( src->getStart(), r->getStart(), src->getFlags() );
10220 ldomXRange * r2 = new ldomXRange( r->getStart(), r->getEnd(), src->getFlags() | r->getFlags() );
10221 insert( i++, r1 );
10222 insert( i, r2 );
10223 delete src;
10224 } else {
10225 // 0====== src =====0
10226 // X============== r===========X
10227 //
10228 // 0====== src =====0
10229 // X============== r=====X
10230 //
10231 // 0====== src =====0
10232 // X============== r=====X
10233 //
10234 // 0====== src ========0
10235 // X========== r=======X
10236 src->setFlags( src->getFlags() | r->getFlags() );
10237 insert( i, src );
10238 }
10239 }
10240 }
10241 }
10242
10243 #if BUILD_LITE!=1
10244
findText(lString32 pattern,bool caseInsensitive,bool reverse,int minY,int maxY,LVArray<ldomWord> & words,int maxCount,int maxHeight,int maxHeightCheckStartY)10245 bool ldomDocument::findText( lString32 pattern, bool caseInsensitive, bool reverse, int minY, int maxY, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY )
10246 {
10247 if ( minY<0 )
10248 minY = 0;
10249 int fh = getFullHeight();
10250 if ( maxY<=0 || maxY>fh )
10251 maxY = fh;
10252 // ldomXPointer start = createXPointer( lvPoint(0, minY), reverse?-1:1 );
10253 // ldomXPointer end = createXPointer( lvPoint(10000, maxY), reverse?-1:1 );
10254 // If we're provided with minY or maxY in some empty space (margins, empty
10255 // elements...), they may not resolve to a XPointer.
10256 // Find a valid y near each of them that does resolve to a XPointer:
10257 // We also want to get start/end point to logical-order HTML nodes,
10258 // which might be different from visual-order in bidi text.
10259 ldomXPointer start;
10260 ldomXPointer end;
10261 for (int y = minY; y >= 0; y--) {
10262 start = createXPointer( lvPoint(0, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST
10263 : PT_DIR_SCAN_FORWARD_LOGICAL_FIRST );
10264 if (!start.isNull())
10265 break;
10266 }
10267 if (start.isNull()) {
10268 // If none found (can happen when minY=0 and blank content at start
10269 // of document like a <br/>), scan forward from document start
10270 for (int y = 0; y <= fh; y++) {
10271 start = createXPointer( lvPoint(0, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST
10272 : PT_DIR_SCAN_FORWARD_LOGICAL_FIRST );
10273 if (!start.isNull())
10274 break;
10275 }
10276 }
10277 for (int y = maxY; y <= fh; y++) {
10278 end = createXPointer( lvPoint(10000, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_LAST
10279 : PT_DIR_SCAN_FORWARD_LOGICAL_LAST );
10280 if (!end.isNull())
10281 break;
10282 }
10283 if (end.isNull()) {
10284 // If none found (can happen when maxY=fh and blank content at end
10285 // of book like a <br/>), scan backward from document end
10286 for (int y = fh; y >= 0; y--) {
10287 end = createXPointer( lvPoint(10000, y), reverse ? PT_DIR_SCAN_BACKWARD_LOGICAL_LAST
10288 : PT_DIR_SCAN_FORWARD_LOGICAL_LAST );
10289 if (!end.isNull())
10290 break;
10291 }
10292 }
10293
10294 if ( start.isNull() || end.isNull() )
10295 return false;
10296 ldomXRange range( start, end );
10297 CRLog::debug("ldomDocument::findText() for Y %d..%d, range %d..%d",
10298 minY, maxY, start.toPoint().y, end.toPoint().y);
10299 if ( range.getStart().toPoint().y==-1 ) {
10300 range.getStart().nextVisibleText();
10301 CRLog::debug("ldomDocument::findText() updated range %d..%d",
10302 range.getStart().toPoint().y, range.getEnd().toPoint().y);
10303 }
10304 if ( range.getEnd().toPoint().y==-1 ) {
10305 range.getEnd().prevVisibleText();
10306 CRLog::debug("ldomDocument::findText() updated range %d..%d",
10307 range.getStart().toPoint().y, range.getEnd().toPoint().y);
10308 }
10309 if ( range.isNull() ) {
10310 CRLog::debug("No text found: Range is empty");
10311 return false;
10312 }
10313 return range.findText( pattern, caseInsensitive, reverse, words, maxCount, maxHeight, maxHeightCheckStartY );
10314 }
10315
findText(const lString32 & str,int & pos,int & endpos,const lString32 & pattern)10316 static bool findText( const lString32 & str, int & pos, int & endpos, const lString32 & pattern )
10317 {
10318 int len = pattern.length();
10319 if ( pos < 0 || pos + len > (int)str.length() )
10320 return false;
10321 const lChar32 * s1 = str.c_str() + pos;
10322 const lChar32 * s2 = pattern.c_str();
10323 int nlen = str.length() - pos - len;
10324 for ( int j=0; j<=nlen; j++ ) {
10325 bool matched = true;
10326 int nsofthyphens = 0; // There can be soft-hyphen in str, but not in pattern
10327 for ( int i=0; i<len; i++ ) {
10328 while ( i+nsofthyphens < nlen && s1[i+nsofthyphens] == UNICODE_SOFT_HYPHEN_CODE ) {
10329 nsofthyphens += 1;
10330 }
10331 if ( s1[i+nsofthyphens] != s2[i] ) {
10332 matched = false;
10333 break;
10334 }
10335 }
10336 if ( matched ) {
10337 endpos = pos + len + nsofthyphens;
10338 return true;
10339 }
10340 s1++;
10341 pos++;
10342 }
10343 return false;
10344 }
10345
findTextRev(const lString32 & str,int & pos,int & endpos,const lString32 & pattern)10346 static bool findTextRev( const lString32 & str, int & pos, int & endpos, const lString32 & pattern )
10347 {
10348 int len = pattern.length();
10349 if ( pos+len>(int)str.length() )
10350 pos = str.length()-len;
10351 if ( pos < 0 )
10352 return false;
10353 const lChar32 * s1 = str.c_str() + pos;
10354 const lChar32 * s2 = pattern.c_str();
10355 int nlen = pos;
10356 for ( int j=nlen; j>=0; j-- ) {
10357 bool matched = true;
10358 int nsofthyphens = 0; // There can be soft-hyphen in str, but not in pattern
10359 for ( int i=0; i<len; i++ ) {
10360 while ( i+nsofthyphens < nlen && s1[i+nsofthyphens] == UNICODE_SOFT_HYPHEN_CODE ) {
10361 nsofthyphens += 1;
10362 }
10363 if ( s1[i+nsofthyphens] != s2[i] ) {
10364 matched = false;
10365 break;
10366 }
10367 }
10368 if ( matched ) {
10369 endpos = pos + len + nsofthyphens;
10370 return true;
10371 }
10372 s1--;
10373 pos--;
10374 }
10375 return false;
10376 }
10377
10378 /// searches for specified text inside range
findText(lString32 pattern,bool caseInsensitive,bool reverse,LVArray<ldomWord> & words,int maxCount,int maxHeight,int maxHeightCheckStartY,bool checkMaxFromStart)10379 bool ldomXRange::findText( lString32 pattern, bool caseInsensitive, bool reverse, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY, bool checkMaxFromStart )
10380 {
10381 if ( caseInsensitive )
10382 pattern.lowercase();
10383 words.clear();
10384 if ( pattern.empty() )
10385 return false;
10386 if ( reverse ) {
10387 // reverse search
10388 if ( !_end.isText() ) {
10389 _end.prevVisibleText();
10390 lString32 txt = _end.getNode()->getText();
10391 _end.setOffset(txt.length());
10392 }
10393 int firstFoundTextY = -1;
10394 while ( !isNull() ) {
10395
10396 lString32 txt = _end.getNode()->getText();
10397 int offs = _end.getOffset();
10398 int endpos;
10399
10400 if ( firstFoundTextY!=-1 && maxHeight>0 ) {
10401 ldomXPointer p( _end.getNode(), offs );
10402 int currentTextY = p.toPoint().y;
10403 if ( currentTextY<firstFoundTextY-maxHeight )
10404 return words.length()>0;
10405 }
10406
10407 if ( caseInsensitive )
10408 txt.lowercase();
10409
10410 while ( ::findTextRev( txt, offs, endpos, pattern ) ) {
10411 if ( firstFoundTextY==-1 && maxHeight>0 ) {
10412 ldomXPointer p( _end.getNode(), offs );
10413 int currentTextY = p.toPoint().y;
10414 if (maxHeightCheckStartY == -1 || currentTextY <= maxHeightCheckStartY)
10415 firstFoundTextY = currentTextY;
10416 }
10417 words.add( ldomWord(_end.getNode(), offs, endpos ) );
10418 offs--;
10419 }
10420 if ( !_end.prevVisibleText() )
10421 break;
10422 txt = _end.getNode()->getText();
10423 _end.setOffset(txt.length());
10424 if ( words.length() >= maxCount )
10425 break;
10426 }
10427 } else {
10428 // direct search
10429 if ( !_start.isText() )
10430 _start.nextVisibleText();
10431 int firstFoundTextY = -1;
10432 if (checkMaxFromStart) {
10433 ldomXPointer p( _start.getNode(), _start.getOffset() );
10434 firstFoundTextY = p.toPoint().y;
10435 }
10436 while ( !isNull() ) {
10437 int offs = _start.getOffset();
10438 int endpos;
10439
10440 if ( firstFoundTextY!=-1 && maxHeight>0 ) {
10441 ldomXPointer p( _start.getNode(), offs );
10442 int currentTextY = p.toPoint().y;
10443 if ( (checkMaxFromStart && currentTextY>=firstFoundTextY+maxHeight) ||
10444 currentTextY>firstFoundTextY+maxHeight )
10445 return words.length()>0;
10446 }
10447
10448 lString32 txt = _start.getNode()->getText();
10449 if ( caseInsensitive )
10450 txt.lowercase();
10451
10452 while ( ::findText( txt, offs, endpos, pattern ) ) {
10453 if ( firstFoundTextY==-1 && maxHeight>0 ) {
10454 ldomXPointer p( _start.getNode(), offs );
10455 int currentTextY = p.toPoint().y;
10456 if (checkMaxFromStart) {
10457 if ( currentTextY>=firstFoundTextY+maxHeight )
10458 return words.length()>0;
10459 } else {
10460 if (maxHeightCheckStartY == -1 || currentTextY >= maxHeightCheckStartY)
10461 firstFoundTextY = currentTextY;
10462 }
10463 }
10464 words.add( ldomWord(_start.getNode(), offs, endpos ) );
10465 offs++;
10466 }
10467 if ( !_start.nextVisibleText() )
10468 break;
10469 if ( words.length() >= maxCount )
10470 break;
10471 }
10472 }
10473 return words.length() > 0;
10474 }
10475
10476 /// fill marked ranges list
10477 // Transform a list of ldomXRange (start and end xpointers) into a list
10478 // of ldomMarkedRange (start and end point coordinates) for native
10479 // drawing of highlights
getRanges(ldomMarkedRangeList & dst)10480 void ldomXRangeList::getRanges( ldomMarkedRangeList &dst )
10481 {
10482 dst.clear();
10483 if ( empty() )
10484 return;
10485 for ( int i=0; i<length(); i++ ) {
10486 ldomXRange * range = get(i);
10487 if (range->getFlags() < 0x10) {
10488 // Legacy marks drawing: make a single ldomMarkedRange spanning
10489 // multiple lines, assuming full width LTR paragraphs)
10490 // (Updated to use toPoint(extended=true) to have them shifted
10491 // by the margins and paddings of final blocks, to be compatible
10492 // with getSegmentRects() below that does that internally.)
10493 lvPoint ptStart = range->getStart().toPoint(true);
10494 lvPoint ptEnd = range->getEnd().toPoint(true);
10495 // LVE:DEBUG
10496 // CRLog::trace("selectRange( %d,%d : %d,%d : %s, %s )", ptStart.x, ptStart.y, ptEnd.x, ptEnd.y,
10497 // LCSTR(range->getStart().toString()), LCSTR(range->getEnd().toString()) );
10498 if ( ptStart.y > ptEnd.y || ( ptStart.y == ptEnd.y && ptStart.x >= ptEnd.x ) ) {
10499 // Swap ptStart and ptEnd if coordinates seems inverted (or we would
10500 // get item->empty()), which is needed for bidi/rtl.
10501 // Hoping this has no side effect.
10502 lvPoint ptTmp = ptStart;
10503 ptStart = ptEnd;
10504 ptEnd = ptTmp;
10505 }
10506 ldomMarkedRange * item = new ldomMarkedRange( ptStart, ptEnd, range->getFlags() );
10507 if ( !item->empty() )
10508 dst.add( item );
10509 else
10510 delete item;
10511 }
10512 else {
10513 // Enhanced marks drawing: from a single ldomXRange, make multiple segmented
10514 // ldomMarkedRange, each spanning a single line.
10515 LVArray<lvRect> rects;
10516 range->getSegmentRects(rects);
10517 for (int i=0; i<rects.length(); i++) {
10518 lvRect r = rects[i];
10519 // printf("r %d %dx%d %dx%d\n", i, r.topLeft().x, r.topLeft().y, r.bottomRight().x, r.bottomRight().y);
10520 ldomMarkedRange * item = new ldomMarkedRange( r.topLeft(), r.bottomRight(), range->getFlags() );
10521 if ( !item->empty() )
10522 dst.add( item );
10523 else
10524 delete item;
10525 }
10526 }
10527 }
10528 }
10529
10530 /// fill text selection list by splitting text into monotonic flags ranges
splitText(ldomMarkedTextList & dst,ldomNode * textNodeToSplit)10531 void ldomXRangeList::splitText( ldomMarkedTextList &dst, ldomNode * textNodeToSplit )
10532 {
10533 lString32 text = textNodeToSplit->getText();
10534 if ( length()==0 ) {
10535 dst.add( new ldomMarkedText( text, 0, 0 ) );
10536 return;
10537 }
10538 ldomXRange textRange( textNodeToSplit );
10539 ldomXRangeList ranges;
10540 ranges.add( new ldomXRange(textRange) );
10541 int i;
10542 for ( i=0; i<length(); i++ ) {
10543 ranges.split( get(i) );
10544 }
10545 for ( i=0; i<ranges.length(); i++ ) {
10546 ldomXRange * r = ranges[i];
10547 int start = r->getStart().getOffset();
10548 int end = r->getEnd().getOffset();
10549 if ( end>start )
10550 dst.add( new ldomMarkedText( text.substr(start, end-start), r->getFlags(), start ) );
10551 }
10552 /*
10553 if ( dst.length() ) {
10554 CRLog::debug(" splitted: ");
10555 for ( int k=0; k<dst.length(); k++ ) {
10556 CRLog::debug(" (%d, %d) %s", dst[k]->offset, dst[k]->flags, UnicodeToUtf8(dst[k]->text).c_str());
10557 }
10558 }
10559 */
10560 }
10561
10562 /// returns rectangle (in doc coordinates) for range. Returns true if found.
10563 // Note that this works correctly only when start and end are in the
10564 // same text node.
getRectEx(lvRect & rect,bool & isSingleLine)10565 bool ldomXRange::getRectEx( lvRect & rect, bool & isSingleLine )
10566 {
10567 isSingleLine = false;
10568 if ( isNull() )
10569 return false;
10570 // get start and end rects
10571 lvRect rc1;
10572 lvRect rc2;
10573 // inner=true if enhanced rendering, to directly get the inner coordinates,
10574 // so no need to compute paddings (as done below for legacy rendering)
10575 if ( !getStart().getRect(rc1, true) || !getEnd().getRect(rc2, true) )
10576 return false;
10577 ldomNode * finalNode1 = getStart().getFinalNode();
10578 ldomNode * finalNode2 = getEnd().getFinalNode();
10579 if ( !finalNode1 || !finalNode2 ) {
10580 // Shouldn't happen, but prevent a segfault in case some other bug
10581 // in initNodeRendMethod made some text not having an erm_final ancestor.
10582 if ( !finalNode1 )
10583 printf("CRE WARNING: no final parent for range start %s\n", UnicodeToLocal(getStart().toString()).c_str());
10584 if ( !finalNode2 )
10585 printf("CRE WARNING: no final parent for range end %s\n", UnicodeToLocal(getEnd().toString()).c_str());
10586 return false;
10587 }
10588 RenderRectAccessor fmt1(finalNode1);
10589 RenderRectAccessor fmt2(finalNode2);
10590 // In legacy mode, we just got the erm_final coordinates, and we must
10591 // compute and add left/top border and padding (using rc.width() as
10592 // the base for % is wrong here, and so is rc.height() for padding top)
10593 if ( ! RENDER_RECT_HAS_FLAG(fmt1, INNER_FIELDS_SET) ) {
10594 int em = finalNode1->getFont()->getSize();
10595 int padding_left = measureBorder(finalNode1,3) + lengthToPx(finalNode1->getStyle()->padding[0], fmt1.getWidth(), em);
10596 int padding_top = measureBorder(finalNode1,0) + lengthToPx(finalNode1->getStyle()->padding[2], fmt1.getWidth(), em);
10597 rc1.top += padding_top;
10598 rc1.left += padding_left;
10599 rc1.right += padding_left;
10600 rc1.bottom += padding_top;
10601 }
10602 if ( ! RENDER_RECT_HAS_FLAG(fmt2, INNER_FIELDS_SET) ) {
10603 int em = finalNode2->getFont()->getSize();
10604 int padding_left = measureBorder(finalNode2,3) + lengthToPx(finalNode2->getStyle()->padding[0], fmt2.getWidth(), em);
10605 int padding_top = measureBorder(finalNode2,0) + lengthToPx(finalNode2->getStyle()->padding[2], fmt2.getWidth(), em);
10606 rc2.top += padding_top;
10607 rc2.left += padding_left;
10608 rc2.right += padding_left;
10609 rc2.bottom += padding_top;
10610 }
10611 if ( rc1.top == rc2.top && rc1.bottom == rc2.bottom ) {
10612 // on same line
10613 rect.left = rc1.left;
10614 rect.top = rc1.top;
10615 rect.right = rc2.right;
10616 rect.bottom = rc2.bottom;
10617 isSingleLine = true;
10618 return !rect.isEmpty();
10619 }
10620 // on different lines
10621 ldomNode * parent = getNearestCommonParent();
10622 if ( !parent )
10623 return false;
10624 parent->getAbsRect(rect);
10625 rect.top = rc1.top;
10626 rect.bottom = rc2.bottom;
10627 return !rect.isEmpty();
10628 }
10629
10630 // Returns the multiple segments (rectangle for each text line) that
10631 // this ldomXRange spans on the page.
10632 // The text content from S to E on this page will push 4 segments:
10633 // ......
10634 // ...S==
10635 // ======
10636 // ======
10637 // ==E..
10638 // ......
getSegmentRects(LVArray<lvRect> & rects)10639 void ldomXRange::getSegmentRects( LVArray<lvRect> & rects )
10640 {
10641 bool go_on = true;
10642 int lcount = 1;
10643 lvRect lineStartRect = lvRect();
10644 lvRect nodeStartRect = lvRect();
10645 lvRect curCharRect = lvRect();
10646 lvRect prevCharRect = lvRect();
10647 ldomNode *prevFinalNode = NULL; // to add rect when we cross final nodes
10648
10649 // We process range text node by text node (I thought rects' y-coordinates
10650 // comparisons were valid only for a same text node, but it seems all
10651 // text on a line get the same .top and .bottom, even if they have a
10652 // smaller font size - but using ldomXRange.getRectEx() on multiple
10653 // text nodes gives wrong rects for the last chars on a line...)
10654
10655 // Note: someRect.extend(someOtherRect) and !someRect.isEmpty() expect
10656 // a rect to have both width and height non-zero. So, make sure
10657 // in getRectEx() that we always get a rect of width at least 1px,
10658 // otherwise some lines may not be highlighted.
10659
10660 // Note: the range end offset is NOT part of the range (it points to the
10661 // char after, or last char + 1 if it includes the whole text node text)
10662 ldomXPointerEx rangeEnd = getEnd();
10663 ldomXPointerEx curPos = ldomXPointerEx( getStart() ); // copy, will change
10664 if (!curPos.isText()) // we only deal with text nodes: get the first
10665 go_on = curPos.nextText();
10666
10667 while (go_on) { // new line or new/continued text node
10668 // We may have (empty or not if not yet pushed) from previous iteration:
10669 // lineStartRect : char rect for first char of line, even if from another text node
10670 // nodeStartRect : char rect of current char at curPos (calculated but not included
10671 // in previous line), that is now the start of the line
10672 // The curPos.getRectEx(charRect) we use returns a rect for a single char, with
10673 // the width of the char. We then "extend" it to the char at end of line (or end
10674 // of range) to make a segment that we add to the provided &rects.
10675 // We use getRectEx() with adjusted=true, for fine tuned glyph rectangles
10676 // that include the excessive left or right side bearing.
10677
10678 if (!curPos || curPos.isNull() || curPos.compare(rangeEnd) >= 0) {
10679 // no more text node, or after end of range: we're done
10680 break;
10681 }
10682
10683 ldomNode *curFinalNode = curPos.getFinalNode();
10684 if (curFinalNode != prevFinalNode) {
10685 // Force a new segment if we're crossing final nodes, that is, when
10686 // we're no more in the same inline context (so we get a new segment
10687 // for each table cells that may happen to be rendered on the same line)
10688 if (! lineStartRect.isEmpty()) {
10689 rects.add( lineStartRect );
10690 lineStartRect = lvRect(); // reset
10691 }
10692 prevFinalNode = curFinalNode;
10693 }
10694
10695 int startOffset = curPos.getOffset();
10696 lString32 nodeText = curPos.getText();
10697 int textLen = nodeText.length();
10698
10699 if (startOffset == 0) { // new text node
10700 nodeStartRect = lvRect(); // reset
10701 if (textLen == 0) { // empty text node (not sure that can happen)
10702 go_on = curPos.nextText();
10703 continue;
10704 }
10705 }
10706 // Skip space at start of node or at start of new line
10707 // (the XML parser made sure we always have a single space
10708 // at boundaries)
10709 if (nodeText[startOffset] == ' ') {
10710 startOffset += 1;
10711 nodeStartRect = lvRect(); // reset
10712 }
10713 if (startOffset >= textLen) { // no more text in this node (or single space node)
10714 go_on = curPos.nextText();
10715 nodeStartRect = lvRect(); // reset
10716 continue;
10717 }
10718 curPos.setOffset(startOffset);
10719 if (nodeStartRect.isEmpty()) { // otherwise, we re-use the one left from previous loop
10720 // getRectEx() seems to fail on a single no-break-space, but we
10721 // are not supposed to see a no-br space at start of line.
10722 // Anyway, try next chars if first one(s) fails
10723 while (startOffset <= textLen-2 && !curPos.getRectEx(nodeStartRect, true)) {
10724 // printf("#### curPos.getRectEx(nodeStartRect:%d) failed\n", startOffset);
10725 startOffset++;
10726 curPos.setOffset(startOffset);
10727 nodeStartRect = lvRect(); // reset
10728 }
10729 // last try with the last char (startOffset = textLen-1):
10730 if (!curPos.getRectEx(nodeStartRect, true)) {
10731 // printf("#### curPos.getRectEx(nodeStartRect) failed\n");
10732 // getRectEx() returns false when a node is invisible, so we just
10733 // go processing next text node on failure (it may fail for other
10734 // reasons that we won't notice, except for may be holes in the
10735 // highlighting)
10736 go_on = curPos.nextText(); // skip this text node
10737 nodeStartRect = lvRect(); // reset
10738 continue;
10739 }
10740 }
10741 if (lineStartRect.isEmpty()) {
10742 lineStartRect = nodeStartRect; // re-use the one already computed
10743 }
10744 // This would help noticing a line-feed-back-to-start-of-line:
10745 // else if (nodeStartRect.left < lineStartRect.right)
10746 // but it makes a 2-lines-tall single segment if text-indent is larger
10747 // than previous line end.
10748 // So, use .top comparison
10749 else if (nodeStartRect.top > lineStartRect.top) {
10750 // We ended last node on a line, but a new node starts (or previous
10751 // one continues) on a different line.
10752 // And we have a not-yet-added lineStartRect: add it as it is
10753 rects.add( lineStartRect );
10754 lineStartRect = nodeStartRect; // start line on current node
10755 }
10756
10757 // 1) Look if text node contains end of range (probably the case
10758 // when only a few words are highlighted)
10759 if (curPos.getNode() == rangeEnd.getNode() && rangeEnd.getOffset() <= textLen) {
10760 curCharRect = lvRect();
10761 curPos.setOffset(rangeEnd.getOffset() - 1); // Range end is not part of the range
10762 if (!curPos.getRectEx(curCharRect, true)) {
10763 // printf("#### curPos.getRectEx(textLen=%d) failed\n", textLen);
10764 go_on = curPos.nextText(); // skip this text node
10765 nodeStartRect = lvRect(); // reset
10766 continue;
10767 }
10768 if (curCharRect.top == nodeStartRect.top) { // end of range is on current line
10769 // (Two offsets in a same text node with the same tops are on the same line)
10770 lineStartRect.extend(curCharRect);
10771 // lineStartRect will be added after loop exit
10772 break; // we're done
10773 }
10774 }
10775
10776 // 2) Look if the full text node is contained on the line
10777 // Ignore (possibly collapsed) space at end of text node
10778 curPos.setOffset(nodeText[textLen-1] == ' ' ? textLen-2 : textLen-1 );
10779 curCharRect = lvRect();
10780 if (!curPos.getRectEx(curCharRect, true)) {
10781 // printf("#### curPos.getRectEx(textLen=%d) failed\n", textLen);
10782 go_on = curPos.nextText(); // skip this text node
10783 nodeStartRect = lvRect(); // reset
10784 continue;
10785 }
10786 if (curCharRect.top == nodeStartRect.top) {
10787 // Extend line up to the end of this node, but don't add it yet,
10788 // lineStartRect can still be extended with (parts of) next text nodes
10789 lineStartRect.extend(curCharRect);
10790 nodeStartRect = lvRect(); // reset
10791 go_on = curPos.nextText(); // go processing next text node
10792 continue;
10793 }
10794
10795 // 3) Current text node's end is not on our line:
10796 // scan it char by char to see where it changes line
10797 // (we could use binary search to reduce the number of iterations)
10798 curPos.setOffset(startOffset);
10799 prevCharRect = nodeStartRect;
10800 for (int i=startOffset+1; i<=textLen-1; i++) {
10801 // skip spaces (but let soft-hyphens in, so they are part of the
10802 // highlight when they are shown at end of line)
10803 lChar32 c = nodeText[i];
10804 if (c == ' ') // || c == 0x00AD)
10805 continue;
10806 curPos.setOffset(i);
10807 curCharRect = lvRect(); // reset
10808 if (!curPos.getRectEx(curCharRect, true)) {
10809 // printf("#### curPos.getRectEx(char=%d) failed\n", i);
10810 // Can happen with non-break-space and may be others,
10811 // just try with next char
10812 continue;
10813 }
10814 if (curPos.compare(rangeEnd) >= 0) {
10815 // should not happen, we should have dealt with it as 1)
10816 // printf("??????????? curPos.getRectEx(char=%d) end of range\n", i);
10817 go_on = false; // don't break yet, need to add what we met before
10818 curCharRect.top = -1; // force adding prevCharRect
10819 }
10820 if (curCharRect.top != nodeStartRect.top) { // no more on the same line
10821 if ( ! prevCharRect.isEmpty() ) { // (should never be empty)
10822 // We got previously a rect on this line: it's the end of line
10823 lineStartRect.extend(prevCharRect);
10824 rects.add( lineStartRect );
10825 }
10826 // Continue with this text node, but on a new line
10827 nodeStartRect = curCharRect;
10828 lineStartRect = lvRect(); // reset
10829 break; // break for (i<textLen) loop
10830 }
10831 prevCharRect = curCharRect; // still on the line: candidate for end of line
10832 if (! go_on)
10833 break; // we're done
10834 }
10835 }
10836 // Add any lineStartRect not yet added
10837 if (! lineStartRect.isEmpty()) {
10838 rects.add( lineStartRect );
10839 }
10840 }
10841
10842 /// sets range to nearest word bounds, returns true if success
getWordRange(ldomXRange & range,ldomXPointer & p)10843 bool ldomXRange::getWordRange( ldomXRange & range, ldomXPointer & p )
10844 {
10845 ldomNode * node = p.getNode();
10846 if ( !node->isText() )
10847 return false;
10848 int pos = p.getOffset();
10849 lString32 txt = node->getText();
10850 if ( pos<0 )
10851 pos = 0;
10852 if ( pos>(int)txt.length() )
10853 pos = txt.length();
10854 int endpos = pos;
10855 for (;;) {
10856 lChar32 ch = txt[endpos];
10857 if ( ch==0 || ch==' ' )
10858 break;
10859 endpos++;
10860 }
10861 /*
10862 // include trailing space
10863 for (;;) {
10864 lChar32 ch = txt[endpos];
10865 if ( ch==0 || ch!=' ' )
10866 break;
10867 endpos++;
10868 }
10869 */
10870 for ( ;; ) {
10871 if ( pos==0 )
10872 break;
10873 if ( txt[pos]!=' ' )
10874 break;
10875 pos--;
10876 }
10877 for ( ;; ) {
10878 if ( pos==0 )
10879 break;
10880 if ( txt[pos-1]==' ' )
10881 break;
10882 pos--;
10883 }
10884 ldomXRange r( ldomXPointer( node, pos ), ldomXPointer( node, endpos ) );
10885 range = r;
10886 return true;
10887 }
10888 #endif
10889
10890 /// returns true if intersects specified line rectangle
intersects(lvRect & rc,lvRect & intersection)10891 bool ldomMarkedRange::intersects( lvRect & rc, lvRect & intersection )
10892 {
10893 if ( flags < 0x10 ) {
10894 // This assumes lines (rc) are from full-width LTR paragraphs, and
10895 // takes some shortcuts when checking intersection (it can be wrong
10896 // when floats, table cells, or RTL/BiDi text are involved).
10897 if ( start.y>=rc.bottom )
10898 return false;
10899 if ( end.y<rc.top )
10900 return false;
10901 intersection = rc;
10902 if ( start.y>=rc.top && start.y<rc.bottom ) {
10903 if ( start.x > rc.right )
10904 return false;
10905 intersection.left = rc.left > start.x ? rc.left : start.x;
10906 }
10907 if ( end.y>=rc.top && end.y<rc.bottom ) {
10908 if ( end.x < rc.left )
10909 return false;
10910 intersection.right = rc.right < end.x ? rc.right : end.x;
10911 }
10912 return true;
10913 }
10914 else {
10915 // Don't take any shortcut and check the full intersection
10916 if ( rc.bottom <= start.y || rc.top >= end.y || rc.right <= start.x || rc.left >= end.x ) {
10917 return false; // no intersection
10918 }
10919 intersection.top = rc.top > start.y ? rc.top : start.y;
10920 intersection.bottom = rc.bottom < end.y ? rc.bottom : end.y;
10921 intersection.left = rc.left > start.x ? rc.left : start.x;
10922 intersection.right = rc.right < end.x ? rc.right : end.x;
10923 return !intersection.isEmpty();
10924 }
10925 }
10926
10927 /// create bounded by RC list, with (0,0) coordinates at left top corner
10928 // crop/discard elements outside of rc (or outside of crop_rc instead if provided)
ldomMarkedRangeList(const ldomMarkedRangeList * list,lvRect & rc,lvRect * crop_rc)10929 ldomMarkedRangeList::ldomMarkedRangeList( const ldomMarkedRangeList * list, lvRect & rc, lvRect * crop_rc )
10930 {
10931 if ( !list || list->empty() )
10932 return;
10933 // if ( list->get(0)->start.y>rc.bottom )
10934 // return;
10935 // if ( list->get( list->length()-1 )->end.y < rc.top )
10936 // return;
10937 if ( !crop_rc ) {
10938 // If no alternate crop_rc provided, crop to the rc anchor
10939 crop_rc = &rc;
10940 }
10941 for ( int i=0; i<list->length(); i++ ) {
10942 ldomMarkedRange * src = list->get(i);
10943 if ( src->start.y >= crop_rc->bottom || src->end.y < crop_rc->top )
10944 continue;
10945 add( new ldomMarkedRange(
10946 lvPoint(src->start.x-rc.left, src->start.y-rc.top ),
10947 lvPoint(src->end.x-rc.left, src->end.y-rc.top ),
10948 src->flags ) );
10949 }
10950 }
10951
10952 /// returns nearest common element for start and end points
getNearestCommonParent()10953 ldomNode * ldomXRange::getNearestCommonParent()
10954 {
10955 ldomXPointerEx start(getStart());
10956 ldomXPointerEx end(getEnd());
10957 while ( start.getLevel() > end.getLevel() && start.parent() )
10958 ;
10959 while ( start.getLevel() < end.getLevel() && end.parent() )
10960 ;
10961 /*
10962 while ( start.getIndex()!=end.getIndex() && start.parent() && end.parent() )
10963 ;
10964 if ( start.getNode()==end.getNode() )
10965 return start.getNode();
10966 return NULL;
10967 */
10968 // This above seems wrong: we could have start and end on the same level,
10969 // but in different parent nodes, with still the same index among these
10970 // different parent nodes' children.
10971 // Best to check for node identity, till we find the same parent,
10972 // or the root node
10973 while ( start.getNode()!=end.getNode() && start.parent() && end.parent() )
10974 ;
10975 return start.getNode();
10976 }
10977
10978 /// returns HTML (serialized from the DOM, may be different from the source HTML)
10979 /// puts the paths of the linked css files met into the provided lString32Collection cssFiles
getHtml(lString32Collection & cssFiles,int wflags)10980 lString8 ldomXPointer::getHtml(lString32Collection & cssFiles, int wflags)
10981 {
10982 if ( isNull() )
10983 return lString8::empty_str;
10984 ldomNode * startNode = getNode();
10985 LVStreamRef stream = LVCreateMemoryStream(NULL, 0, false, LVOM_WRITE);
10986 writeNodeEx( stream.get(), startNode, cssFiles, wflags );
10987 int size = stream->GetSize();
10988 LVArray<char> buf( size+1, '\0' );
10989 stream->Seek(0, LVSEEK_SET, NULL);
10990 stream->Read( buf.get(), size, NULL );
10991 buf[size] = 0;
10992 lString8 html = lString8( buf.get() );
10993 return html;
10994 }
10995
10996 /// returns HTML (serialized from the DOM, may be different from the source HTML)
10997 /// puts the paths of the linked css files met into the provided lString32Collection cssFiles
getHtml(lString32Collection & cssFiles,int wflags,bool fromRootNode)10998 lString8 ldomXRange::getHtml(lString32Collection & cssFiles, int wflags, bool fromRootNode)
10999 {
11000 if ( isNull() )
11001 return lString8::empty_str;
11002 sort();
11003 ldomNode * startNode;
11004 if (fromRootNode) {
11005 startNode = getStart().getNode()->getDocument()->getRootNode();
11006 if (startNode->getChildCount() == 1) // start HTML with first child (<body>)
11007 startNode = startNode->getFirstChild();
11008 }
11009 else {
11010 // We need to start from the nearest common parent, to get balanced HTML
11011 startNode = getNearestCommonParent();
11012 }
11013 LVStreamRef stream = LVCreateMemoryStream(NULL, 0, false, LVOM_WRITE);
11014 writeNodeEx( stream.get(), startNode, cssFiles, wflags, getStart(), getEnd() );
11015 int size = stream->GetSize();
11016 LVArray<char> buf( size+1, '\0' );
11017 stream->Seek(0, LVSEEK_SET, NULL);
11018 stream->Read( buf.get(), size, NULL );
11019 buf[size] = 0;
11020 lString8 html = lString8( buf.get() );
11021 return html;
11022 }
11023
11024 /// searches path for element with specific id, returns level at which element is founs, 0 if not found
findElementInPath(lUInt16 id)11025 int ldomXPointerEx::findElementInPath( lUInt16 id )
11026 {
11027 if ( !ensureElement() )
11028 return 0;
11029 ldomNode * e = getNode();
11030 for ( ; e!=NULL; e = e->getParentNode() ) {
11031 if ( e->getNodeId()==id ) {
11032 return e->getNodeLevel();
11033 }
11034 }
11035 return 0;
11036 }
11037
ensureFinal()11038 bool ldomXPointerEx::ensureFinal()
11039 {
11040 if ( !ensureElement() )
11041 return false;
11042 int cnt = 0;
11043 int foundCnt = -1;
11044 ldomNode * e = getNode();
11045 for ( ; e!=NULL; e = e->getParentNode() ) {
11046 if ( e->getRendMethod() == erm_final ) {
11047 foundCnt = cnt;
11048 }
11049 cnt++;
11050 }
11051 if ( foundCnt<0 )
11052 return false;
11053 for ( int i=0; i<foundCnt; i++ )
11054 parent();
11055 // curent node is final formatted element (e.g. paragraph)
11056 return true;
11057 }
11058
11059 /// ensure that current node is element (move to parent, if not - from text node to element)
ensureElement()11060 bool ldomXPointerEx::ensureElement()
11061 {
11062 ldomNode * node = getNode();
11063 if ( !node )
11064 return false;
11065 if ( node->isText()) {
11066 if (!parent())
11067 return false;
11068 node = getNode();
11069 }
11070 if ( !node || !node->isElement() )
11071 return false;
11072 return true;
11073 }
11074
11075 /// move to first child of current node
firstChild()11076 bool ldomXPointerEx::firstChild()
11077 {
11078 return child(0);
11079 }
11080
11081 /// move to last child of current node
lastChild()11082 bool ldomXPointerEx::lastChild()
11083 {
11084 int count = getNode()->getChildCount();
11085 if ( count <=0 )
11086 return false;
11087 return child( count - 1 );
11088 }
11089
11090 /// move to first element child of current node
firstElementChild()11091 bool ldomXPointerEx::firstElementChild()
11092 {
11093 ldomNode * node = getNode();
11094 int count = node->getChildCount();
11095 for ( int i=0; i<count; i++ ) {
11096 if ( node->getChildNode( i )->isElement() )
11097 return child( i );
11098 }
11099 return false;
11100 }
11101
11102 /// move to last element child of current node
lastElementChild()11103 bool ldomXPointerEx::lastElementChild()
11104 {
11105 ldomNode * node = getNode();
11106 int count = node->getChildCount();
11107 for ( int i=count-1; i>=0; i-- ) {
11108 if ( node->getChildNode( i )->isElement() )
11109 return child( i );
11110 }
11111 return false;
11112 }
11113
11114 /// forward iteration by elements of DOM three
nextElement()11115 bool ldomXPointerEx::nextElement()
11116 {
11117 if ( !ensureElement() )
11118 return false;
11119 if ( firstElementChild() )
11120 return true;
11121 for (;;) {
11122 if ( nextSiblingElement() )
11123 return true;
11124 if ( !parent() )
11125 return false;
11126 }
11127 }
11128
11129 /// returns true if current node is visible element with render method == erm_final
isVisibleFinal()11130 bool ldomXPointerEx::isVisibleFinal()
11131 {
11132 if ( !isElement() )
11133 return false;
11134 int cnt = 0;
11135 int foundCnt = -1;
11136 ldomNode * e = getNode();
11137 for ( ; e!=NULL; e = e->getParentNode() ) {
11138 switch ( e->getRendMethod() ) {
11139 case erm_final:
11140 foundCnt = cnt;
11141 break;
11142 case erm_invisible:
11143 foundCnt = -1;
11144 break;
11145 default:
11146 break;
11147 }
11148 cnt++;
11149 }
11150 if ( foundCnt != 0 )
11151 return false;
11152 // curent node is visible final formatted element (e.g. paragraph)
11153 return true;
11154 }
11155
11156 /// move to next visible text node
nextVisibleText(bool thisBlockOnly)11157 bool ldomXPointerEx::nextVisibleText( bool thisBlockOnly )
11158 {
11159 ldomXPointerEx backup;
11160 if ( thisBlockOnly )
11161 backup = *this;
11162 while ( nextText(thisBlockOnly) ) {
11163 if ( isVisible() )
11164 return true;
11165 }
11166 if ( thisBlockOnly )
11167 *this = backup;
11168 return false;
11169 }
11170
11171 /// returns true if current node is visible element or text
isVisible()11172 bool ldomXPointerEx::isVisible()
11173 {
11174 ldomNode * p;
11175 ldomNode * node = getNode();
11176 if ( node && node->isText() )
11177 p = node->getParentNode();
11178 else
11179 p = node;
11180 while ( p ) {
11181 if ( p->getRendMethod() == erm_invisible )
11182 return false;
11183 p = p->getParentNode();
11184 }
11185 return true;
11186 }
11187
11188 /// move to next text node
nextText(bool thisBlockOnly)11189 bool ldomXPointerEx::nextText( bool thisBlockOnly )
11190 {
11191 ldomNode * block = NULL;
11192 if ( thisBlockOnly )
11193 block = getThisBlockNode();
11194 setOffset( 0 );
11195 while ( firstChild() ) {
11196 if ( isText() )
11197 return (!thisBlockOnly || getThisBlockNode()==block);
11198 }
11199 for (;;) {
11200 while ( nextSibling() ) {
11201 if ( isText() )
11202 return (!thisBlockOnly || getThisBlockNode()==block);
11203 while ( firstChild() ) {
11204 if ( isText() )
11205 return (!thisBlockOnly || getThisBlockNode()==block);
11206 }
11207 }
11208 if ( !parent() )
11209 return false;
11210 }
11211 }
11212
11213 /// move to previous text node
prevText(bool thisBlockOnly)11214 bool ldomXPointerEx::prevText( bool thisBlockOnly )
11215 {
11216 ldomNode * block = NULL;
11217 if ( thisBlockOnly )
11218 block = getThisBlockNode();
11219 setOffset( 0 );
11220 for (;;) {
11221 while ( prevSibling() ) {
11222 if ( isText() )
11223 return (!thisBlockOnly || getThisBlockNode()==block);
11224 while ( lastChild() ) {
11225 if ( isText() )
11226 return (!thisBlockOnly || getThisBlockNode()==block);
11227 }
11228 }
11229 if ( !parent() )
11230 return false;
11231 }
11232 }
11233
11234 /// move to previous visible text node
prevVisibleText(bool thisBlockOnly)11235 bool ldomXPointerEx::prevVisibleText( bool thisBlockOnly )
11236 {
11237 ldomXPointerEx backup;
11238 if ( thisBlockOnly )
11239 backup = *this;
11240 while ( prevText( thisBlockOnly ) )
11241 if ( isVisible() )
11242 return true;
11243 if ( thisBlockOnly )
11244 *this = backup;
11245 return false;
11246 }
11247
11248 /// move to previous visible char
prevVisibleChar(bool thisBlockOnly)11249 bool ldomXPointerEx::prevVisibleChar( bool thisBlockOnly )
11250 {
11251 if ( isNull() )
11252 return false;
11253 if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11254 // move to previous text
11255 if ( !prevVisibleText(thisBlockOnly) )
11256 return false;
11257 ldomNode * node = getNode();
11258 lString32 text = node->getText();
11259 int textLen = text.length();
11260 _data->setOffset( textLen );
11261 }
11262 _data->addOffset(-1);
11263 return true;
11264 }
11265
11266 /// move to next visible char
nextVisibleChar(bool thisBlockOnly)11267 bool ldomXPointerEx::nextVisibleChar( bool thisBlockOnly )
11268 {
11269 if ( isNull() )
11270 return false;
11271 if ( !isText() || !isVisible() ) {
11272 // move to next text
11273 if ( !nextVisibleText(thisBlockOnly) )
11274 return false;
11275 _data->setOffset( 0 );
11276 return true;
11277 }
11278 ldomNode * node = getNode();
11279 lString32 text = node->getText();
11280 int textLen = text.length();
11281 if ( _data->getOffset() == textLen ) {
11282 // move to next text
11283 if ( !nextVisibleText(thisBlockOnly) )
11284 return false;
11285 _data->setOffset( 0 );
11286 return true;
11287 }
11288 _data->addOffset(1);
11289 return true;
11290 }
11291
11292 // TODO: implement better behavior
IsUnicodeSpace(lChar32 ch)11293 inline bool IsUnicodeSpace( lChar32 ch )
11294 {
11295 //return ch==' ';
11296 switch ((unsigned short)ch) {
11297 case 0x0020: // SPACE
11298 case 0x00A0: // NO-BREAK SPACE
11299 case 0x2000: // EN QUAD
11300 case 0x2001: // EM QUAD
11301 case 0x2002: // EN SPACE
11302 case 0x2003: // EM SPACE
11303 case 0x2004: // THREE-PER-EM SPACE
11304 case 0x2005: // FOUR-PER-EM SPACE
11305 case 0x202F: // NARROW NO-BREAK SPACE
11306 case 0x3000: // IDEOGRAPHIC SPACE
11307 return true;
11308 }
11309 return false;
11310 }
11311
11312 // TODO: implement better behavior
IsUnicodeSpaceOrNull(lChar32 ch)11313 inline bool IsUnicodeSpaceOrNull( lChar32 ch )
11314 {
11315 return ch==0 || IsUnicodeSpace(ch);
11316 }
11317
11318 // Note:
11319 // ALL calls to IsUnicodeSpace and IsUnicodeSpaceOrNull in
11320 // the *VisibleWord* functions below have been replaced with
11321 // calls to IsWordSeparator and IsWordSeparatorOrNull.
11322 // The *Sentence* functions have not beed modified, and have not been
11323 // tested against this change to the *VisibleWord* functions that
11324 // they use (but KOReader does not use these *Sentence* functions).
11325
11326 // For better accuracy than IsUnicodeSpace for detecting words
IsWordSeparator(lChar32 ch)11327 inline bool IsWordSeparator( lChar32 ch )
11328 {
11329 return lStr_isWordSeparator(ch);
11330 }
11331
IsWordSeparatorOrNull(lChar32 ch)11332 inline bool IsWordSeparatorOrNull( lChar32 ch )
11333 {
11334 if (ch==0) return true;
11335 return IsWordSeparator(ch);
11336 }
11337
canWrapWordBefore(lChar32 ch)11338 inline bool canWrapWordBefore( lChar32 ch ) {
11339 return ch>=0x2e80 && ch<0x2CEAF;
11340 }
11341
canWrapWordAfter(lChar32 ch)11342 inline bool canWrapWordAfter( lChar32 ch ) {
11343 return ch>=0x2e80 && ch<0x2CEAF;
11344 }
11345
isVisibleWordChar()11346 bool ldomXPointerEx::isVisibleWordChar() {
11347 if ( isNull() )
11348 return false;
11349 if ( !isText() || !isVisible() )
11350 return false;
11351 ldomNode * node = getNode();
11352 lString32 text = node->getText();
11353 return !IsWordSeparator(text[_data->getOffset()]);
11354 }
11355
11356 /// move to previous visible word beginning
prevVisibleWordStart(bool thisBlockOnly)11357 bool ldomXPointerEx::prevVisibleWordStart( bool thisBlockOnly )
11358 {
11359 if ( isNull() )
11360 return false;
11361 ldomNode * node = NULL;
11362 lString32 text;
11363 for ( ;; ) {
11364 if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11365 // move to previous text
11366 if ( !prevVisibleText(thisBlockOnly) )
11367 return false;
11368 node = getNode();
11369 text = node->getText();
11370 int textLen = text.length();
11371 _data->setOffset( textLen );
11372 } else {
11373 node = getNode();
11374 text = node->getText();
11375 }
11376 bool foundNonSeparator = false;
11377 while ( _data->getOffset() > 0 && IsWordSeparator(text[_data->getOffset()-1]) )
11378 _data->addOffset(-1); // skip preceeding space if any (we were on a visible word start)
11379 while ( _data->getOffset()>0 ) {
11380 if ( IsWordSeparator(text[ _data->getOffset()-1 ]) )
11381 break;
11382 foundNonSeparator = true;
11383 _data->addOffset(-1);
11384 if ( canWrapWordBefore( text[_data->getOffset()] ) ) // CJK char
11385 break;
11386 }
11387 if ( foundNonSeparator )
11388 return true;
11389 }
11390 }
11391
11392 /// move to previous visible word end
prevVisibleWordEnd(bool thisBlockOnly)11393 bool ldomXPointerEx::prevVisibleWordEnd( bool thisBlockOnly )
11394 {
11395 if ( isNull() )
11396 return false;
11397 ldomNode * node = NULL;
11398 lString32 text;
11399 bool moved = false;
11400 for ( ;; ) {
11401 if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11402 // move to previous text
11403 if ( !prevVisibleText(thisBlockOnly) )
11404 return false;
11405 node = getNode();
11406 text = node->getText();
11407 int textLen = text.length();
11408 _data->setOffset( textLen );
11409 moved = true;
11410 } else {
11411 node = getNode();
11412 text = node->getText();
11413 }
11414 // skip separators
11415 while ( _data->getOffset() > 0 && IsWordSeparator(text[_data->getOffset()-1]) ) {
11416 _data->addOffset(-1);
11417 moved = true;
11418 }
11419 if ( moved && _data->getOffset()>0 )
11420 return true; // found!
11421 // skip non-separators
11422 while ( _data->getOffset()>0 ) {
11423 if ( IsWordSeparator(text[ _data->getOffset()-1 ]) )
11424 break;
11425 if ( moved && canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11426 return true;
11427 moved = true;
11428 _data->addOffset(-1);
11429 }
11430 // skip separators
11431 while ( _data->getOffset() > 0 && IsWordSeparator(text[_data->getOffset()-1]) ) {
11432 _data->addOffset(-1);
11433 moved = true;
11434 }
11435 if ( moved && _data->getOffset()>0 )
11436 return true; // found!
11437 }
11438 }
11439
11440 /// move to next visible word beginning
nextVisibleWordStart(bool thisBlockOnly)11441 bool ldomXPointerEx::nextVisibleWordStart( bool thisBlockOnly )
11442 {
11443 if ( isNull() )
11444 return false;
11445 ldomNode * node = NULL;
11446 lString32 text;
11447 int textLen = 0;
11448 bool moved = false;
11449 for ( ;; ) {
11450 if ( !isText() || !isVisible() ) {
11451 // move to previous text
11452 if ( !nextVisibleText(thisBlockOnly) )
11453 return false;
11454 node = getNode();
11455 text = node->getText();
11456 textLen = text.length();
11457 _data->setOffset( 0 );
11458 moved = true;
11459 } else {
11460 for (;;) {
11461 node = getNode();
11462 text = node->getText();
11463 textLen = text.length();
11464 if ( _data->getOffset() < textLen )
11465 break;
11466 if ( !nextVisibleText(thisBlockOnly) )
11467 return false;
11468 _data->setOffset( 0 );
11469 moved = true;
11470 }
11471 }
11472 // skip separators
11473 while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11474 _data->addOffset(1);
11475 moved = true;
11476 }
11477 if ( moved && _data->getOffset()<textLen )
11478 return true;
11479 // skip non-separators
11480 while ( _data->getOffset()<textLen ) {
11481 if ( IsWordSeparator(text[ _data->getOffset() ]) )
11482 break;
11483 if ( moved && canWrapWordBefore( text[_data->getOffset()] ) ) // We moved to a CJK char
11484 return true;
11485 moved = true;
11486 _data->addOffset(1);
11487 }
11488 // skip separators
11489 while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11490 _data->addOffset(1);
11491 moved = true;
11492 }
11493 if ( moved && _data->getOffset()<textLen )
11494 return true;
11495 }
11496 }
11497
11498 /// move to end of current word
thisVisibleWordEnd(bool thisBlockOnly)11499 bool ldomXPointerEx::thisVisibleWordEnd(bool thisBlockOnly)
11500 {
11501 CR_UNUSED(thisBlockOnly);
11502 if ( isNull() )
11503 return false;
11504 ldomNode * node = NULL;
11505 lString32 text;
11506 int textLen = 0;
11507 bool moved = false;
11508 if ( !isText() || !isVisible() )
11509 return false;
11510 node = getNode();
11511 text = node->getText();
11512 textLen = text.length();
11513 if ( _data->getOffset() >= textLen )
11514 return false;
11515 // skip separators
11516 while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11517 _data->addOffset(1);
11518 //moved = true;
11519 }
11520 // skip non-separators
11521 while ( _data->getOffset()<textLen ) {
11522 if ( IsWordSeparator(text[ _data->getOffset() ]) )
11523 break;
11524 moved = true;
11525 _data->addOffset(1);
11526 }
11527 return moved;
11528 }
11529
11530 /// move to next visible word end
nextVisibleWordEnd(bool thisBlockOnly)11531 bool ldomXPointerEx::nextVisibleWordEnd( bool thisBlockOnly )
11532 {
11533 if ( isNull() )
11534 return false;
11535 ldomNode * node = NULL;
11536 lString32 text;
11537 int textLen = 0;
11538 //bool moved = false;
11539 for ( ;; ) {
11540 if ( !isText() || !isVisible() ) {
11541 // move to previous text
11542 if ( !nextVisibleText(thisBlockOnly) )
11543 return false;
11544 node = getNode();
11545 text = node->getText();
11546 textLen = text.length();
11547 _data->setOffset( 0 );
11548 //moved = true;
11549 } else {
11550 for (;;) {
11551 node = getNode();
11552 text = node->getText();
11553 textLen = text.length();
11554 if ( _data->getOffset() < textLen )
11555 break;
11556 if ( !nextVisibleText(thisBlockOnly) )
11557 return false;
11558 _data->setOffset( 0 );
11559 }
11560 }
11561 bool nonSeparatorFound = false;
11562 // skip non-separators
11563 while ( _data->getOffset()<textLen ) {
11564 if ( IsWordSeparator(text[ _data->getOffset() ]) )
11565 break;
11566 nonSeparatorFound = true;
11567 _data->addOffset(1);
11568 if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11569 return true;
11570 }
11571 if ( nonSeparatorFound )
11572 return true;
11573 // skip separators
11574 while ( _data->getOffset()<textLen && IsWordSeparator(text[ _data->getOffset() ]) ) {
11575 _data->addOffset(1);
11576 //moved = true;
11577 }
11578 // skip non-separators
11579 while ( _data->getOffset()<textLen ) {
11580 if ( IsWordSeparator(text[ _data->getOffset() ]) )
11581 break;
11582 nonSeparatorFound = true;
11583 _data->addOffset(1);
11584 if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11585 return true;
11586 }
11587 if ( nonSeparatorFound )
11588 return true;
11589 }
11590 }
11591
11592 /// move to previous visible word beginning (in sentence)
prevVisibleWordStartInSentence()11593 bool ldomXPointerEx::prevVisibleWordStartInSentence()
11594 {
11595 if ( isNull() )
11596 return false;
11597 ldomNode * node = NULL;
11598 lString32 text;
11599 for ( ;; ) {
11600 if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11601 // move to previous text
11602 if ( !prevVisibleText(false) )
11603 return false;
11604 node = getNode();
11605 text = node->getText();
11606 int textLen = text.length();
11607 _data->setOffset( textLen );
11608 } else {
11609 node = getNode();
11610 text = node->getText();
11611 }
11612 bool foundNonSpace = false;
11613 while ( _data->getOffset() > 0 && IsUnicodeSpace(text[_data->getOffset()-1]) )
11614 _data->addOffset(-1); // skip preceeding space if any (we were on a visible word start)
11615 while ( _data->getOffset()>0 ) {
11616 if ( IsUnicodeSpace(text[ _data->getOffset()-1 ]) )
11617 break;
11618 foundNonSpace = true;
11619 _data->addOffset(-1);
11620 if ( canWrapWordBefore( text[_data->getOffset()] ) ) // CJK char
11621 break;
11622 }
11623 if ( foundNonSpace )
11624 return true;
11625 }
11626 }
11627
11628 /// move to next visible word beginning (in sentence)
nextVisibleWordStartInSentence()11629 bool ldomXPointerEx::nextVisibleWordStartInSentence()
11630 {
11631 if ( isNull() )
11632 return false;
11633 ldomNode * node = NULL;
11634 lString32 text;
11635 int textLen = 0;
11636 bool moved = false;
11637 for ( ;; ) {
11638 if ( !isText() || !isVisible() ) {
11639 // move to next text
11640 if ( !nextVisibleText(false) )
11641 return false;
11642 node = getNode();
11643 text = node->getText();
11644 textLen = text.length();
11645 _data->setOffset( 0 );
11646 moved = true;
11647 } else {
11648 for (;;) {
11649 node = getNode();
11650 text = node->getText();
11651 textLen = text.length();
11652 if ( _data->getOffset() < textLen )
11653 break;
11654 if ( !nextVisibleText(false) )
11655 return false;
11656 _data->setOffset( 0 );
11657 moved = true;
11658 }
11659 }
11660 // skip spaces
11661 while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11662 _data->addOffset(1);
11663 moved = true;
11664 }
11665 if ( moved && _data->getOffset()<textLen )
11666 return true;
11667 // skip non-spaces
11668 while ( _data->getOffset()<textLen ) {
11669 if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11670 break;
11671 if ( moved && canWrapWordBefore( text[_data->getOffset()] ) ) // We moved to a CJK char
11672 return true;
11673 moved = true;
11674 _data->addOffset(1);
11675 }
11676 // skip spaces
11677 while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11678 _data->addOffset(1);
11679 moved = true;
11680 }
11681 if ( moved && _data->getOffset()<textLen )
11682 return true;
11683 }
11684 }
11685
11686 /// move to end of current word
thisVisibleWordEndInSentence()11687 bool ldomXPointerEx::thisVisibleWordEndInSentence()
11688 {
11689 if ( isNull() )
11690 return false;
11691 ldomNode * node = NULL;
11692 lString32 text;
11693 int textLen = 0;
11694 bool moved = false;
11695 if ( !isText() || !isVisible() )
11696 return false;
11697 node = getNode();
11698 text = node->getText();
11699 textLen = text.length();
11700 if ( _data->getOffset() >= textLen )
11701 return false;
11702 // skip spaces
11703 while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11704 _data->addOffset(1);
11705 //moved = true;
11706 }
11707 // skip non-spaces
11708 while ( _data->getOffset()<textLen ) {
11709 if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11710 break;
11711 moved = true;
11712 _data->addOffset(1);
11713 }
11714 return moved;
11715 }
11716
11717 /// move to next visible word end (in sentence)
nextVisibleWordEndInSentence()11718 bool ldomXPointerEx::nextVisibleWordEndInSentence()
11719 {
11720 if ( isNull() )
11721 return false;
11722 ldomNode * node = NULL;
11723 lString32 text;
11724 int textLen = 0;
11725 //bool moved = false;
11726 for ( ;; ) {
11727 if ( !isText() || !isVisible() ) {
11728 // move to previous text
11729 if ( !nextVisibleText(true) )
11730 return false;
11731 node = getNode();
11732 text = node->getText();
11733 textLen = text.length();
11734 _data->setOffset( 0 );
11735 //moved = true;
11736 } else {
11737 for (;;) {
11738 node = getNode();
11739 text = node->getText();
11740 textLen = text.length();
11741 if ( _data->getOffset() < textLen )
11742 break;
11743 if ( !nextVisibleText(true) )
11744 return false;
11745 _data->setOffset( 0 );
11746 }
11747 }
11748 bool nonSpaceFound = false;
11749 // skip non-spaces
11750 while ( _data->getOffset()<textLen ) {
11751 if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11752 break;
11753 nonSpaceFound = true;
11754 _data->addOffset(1);
11755 if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11756 return true;
11757 }
11758 if ( nonSpaceFound )
11759 return true;
11760 // skip spaces
11761 while ( _data->getOffset()<textLen && IsUnicodeSpace(text[ _data->getOffset() ]) ) {
11762 _data->addOffset(1);
11763 //moved = true;
11764 }
11765 // skip non-spaces
11766 while ( _data->getOffset()<textLen ) {
11767 if ( IsUnicodeSpace(text[ _data->getOffset() ]) )
11768 break;
11769 nonSpaceFound = true;
11770 _data->addOffset(1);
11771 if ( canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11772 return true;
11773 }
11774 if ( nonSpaceFound )
11775 return true;
11776 }
11777 }
11778
11779 /// move to previous visible word end (in sentence)
prevVisibleWordEndInSentence()11780 bool ldomXPointerEx::prevVisibleWordEndInSentence()
11781 {
11782 if ( isNull() )
11783 return false;
11784 ldomNode * node = NULL;
11785 lString32 text;
11786 bool moved = false;
11787 for ( ;; ) {
11788 if ( !isText() || !isVisible() || _data->getOffset()==0 ) {
11789 // move to previous text
11790 if ( !prevVisibleText(false) )
11791 return false;
11792 node = getNode();
11793 text = node->getText();
11794 int textLen = text.length();
11795 _data->setOffset( textLen );
11796 moved = true;
11797 } else {
11798 node = getNode();
11799 text = node->getText();
11800 }
11801 // skip spaces
11802 while ( _data->getOffset() > 0 && IsUnicodeSpace(text[_data->getOffset()-1]) ) {
11803 _data->addOffset(-1);
11804 moved = true;
11805 }
11806 if ( moved && _data->getOffset()>0 )
11807 return true; // found!
11808 // skip non-spaces
11809 while ( _data->getOffset()>0 ) {
11810 if ( IsUnicodeSpace(text[ _data->getOffset()-1 ]) )
11811 break;
11812 if ( moved && canWrapWordAfter( text[_data->getOffset()] ) ) // We moved to a CJK char
11813 return true;
11814 moved = true;
11815 _data->addOffset(-1);
11816 }
11817 // skip spaces
11818 while ( _data->getOffset() > 0 && IsUnicodeSpace(text[_data->getOffset()-1]) ) {
11819 _data->addOffset(-1);
11820 moved = true;
11821 }
11822 if ( moved && _data->getOffset()>0 )
11823 return true; // found!
11824 }
11825 }
11826
11827 /// returns true if current position is visible word beginning
isVisibleWordStart()11828 bool ldomXPointerEx::isVisibleWordStart()
11829 {
11830 if ( isNull() )
11831 return false;
11832 if ( !isText() || !isVisible() )
11833 return false;
11834 ldomNode * node = getNode();
11835 lString32 text = node->getText();
11836 int textLen = text.length();
11837 int i = _data->getOffset();
11838 // We're actually testing the boundary between the char at i-1 and
11839 // the char at i. So, we return true when [i] is the first letter
11840 // of a word.
11841 lChar32 currCh = i<textLen ? text[i] : 0;
11842 lChar32 prevCh = i<=textLen && i>0 ? text[i-1] : 0;
11843 if (canWrapWordBefore(currCh)) {
11844 // If [i] is a CJK char (that's what canWrapWordBefore()
11845 // checks), this is a visible word start.
11846 return true;
11847 }
11848 if (IsWordSeparatorOrNull(prevCh) && !IsWordSeparator(currCh)) {
11849 // If [i-1] is a space or punctuation (or [i] is the start of the text
11850 // node) and [i] is a letter: this is a visible word start.
11851 return true;
11852 }
11853 return false;
11854 }
11855
11856 /// returns true if current position is visible word end
isVisibleWordEnd()11857 bool ldomXPointerEx::isVisibleWordEnd()
11858 {
11859 if ( isNull() )
11860 return false;
11861 if ( !isText() || !isVisible() )
11862 return false;
11863 ldomNode * node = getNode();
11864 lString32 text = node->getText();
11865 int textLen = text.length();
11866 int i = _data->getOffset();
11867 // We're actually testing the boundary between the char at i-1 and
11868 // the char at i. So, we return true when [i-1] is the last letter
11869 // of a word.
11870 lChar32 currCh = i>0 ? text[i-1] : 0;
11871 lChar32 nextCh = i<textLen ? text[i] : 0;
11872 if (canWrapWordAfter(currCh)) {
11873 // If [i-1] is a CJK char (that's what canWrapWordAfter()
11874 // checks), this is a visible word end.
11875 return true;
11876 }
11877 if (!IsWordSeparator(currCh) && IsWordSeparatorOrNull(nextCh)) {
11878 // If [i-1] is a letter and [i] is a space or punctuation (or [i-1] is
11879 // the last letter of a text node): this is a visible word end.
11880 return true;
11881 }
11882 return false;
11883 }
11884
11885 /// returns block owner node of current node (or current node if it's block)
getThisBlockNode()11886 ldomNode * ldomXPointerEx::getThisBlockNode()
11887 {
11888 if ( isNull() )
11889 return NULL;
11890 ldomNode * node = getNode();
11891 if ( node->isText() )
11892 node = node->getParentNode();
11893 for (;;) {
11894 if ( !node )
11895 return NULL;
11896 lvdom_element_render_method rm = node->getRendMethod();
11897 switch ( rm ) {
11898 case erm_block:
11899 case erm_final:
11900 case erm_table:
11901 case erm_table_row_group:
11902 case erm_table_row:
11903 return node;
11904 default:
11905 break; // ignore
11906 }
11907 node = node->getParentNode();
11908 }
11909 }
11910
11911 /// returns true if points to last visible text inside block element
isLastVisibleTextInBlock()11912 bool ldomXPointerEx::isLastVisibleTextInBlock()
11913 {
11914 if ( !isText() )
11915 return false;
11916 ldomXPointerEx pos(*this);
11917 return !pos.nextVisibleText(true);
11918 }
11919
11920 /// returns true if points to first visible text inside block element
isFirstVisibleTextInBlock()11921 bool ldomXPointerEx::isFirstVisibleTextInBlock()
11922 {
11923 if ( !isText() )
11924 return false;
11925 ldomXPointerEx pos(*this);
11926 return !pos.prevVisibleText(true);
11927 }
11928
11929 // sentence navigation
11930
11931 /// returns true if points to beginning of sentence
isSentenceStart()11932 bool ldomXPointerEx::isSentenceStart()
11933 {
11934 if ( isNull() )
11935 return false;
11936 if ( !isText() || !isVisible() )
11937 return false;
11938 ldomNode * node = getNode();
11939 lString32 text = node->getText();
11940 int textLen = text.length();
11941 int i = _data->getOffset();
11942 lChar32 currCh = i<textLen ? text[i] : 0;
11943 lChar32 prevCh = i>0 ? text[i-1] : 0;
11944 lChar32 prevNonSpace = 0;
11945 for ( ;i>0; i-- ) {
11946 lChar32 ch = text[i-1];
11947 if ( !IsUnicodeSpace(ch) ) {
11948 prevNonSpace = ch;
11949 break;
11950 }
11951 }
11952 #if 0
11953 // At this implementation it's a wrong to check previous node
11954 if ( !prevNonSpace ) {
11955 ldomXPointerEx pos(*this);
11956 while ( !prevNonSpace && pos.prevVisibleText(true) ) {
11957 lString32 prevText = pos.getText();
11958 for ( int j=prevText.length()-1; j>=0; j-- ) {
11959 lChar32 ch = prevText[j];
11960 if ( !IsUnicodeSpace(ch) ) {
11961 prevNonSpace = ch;
11962 break;
11963 }
11964 }
11965 }
11966 }
11967 #endif
11968
11969 // skip separated separator.
11970 if (1 == textLen) {
11971 switch (currCh) {
11972 case '.':
11973 case '?':
11974 case '!':
11975 case U'\x2026': // horizontal ellypsis
11976 return false;
11977 }
11978 }
11979
11980 if ( !IsUnicodeSpace(currCh) && IsUnicodeSpaceOrNull(prevCh) ) {
11981 switch (prevNonSpace) {
11982 case 0:
11983 case '.':
11984 case '?':
11985 case '!':
11986 case U'\x2026': // horizontal ellypsis
11987 return true;
11988 default:
11989 return false;
11990 }
11991 }
11992 return false;
11993 }
11994
11995 /// returns true if points to end of sentence
isSentenceEnd()11996 bool ldomXPointerEx::isSentenceEnd()
11997 {
11998 if ( isNull() )
11999 return false;
12000 if ( !isText() || !isVisible() )
12001 return false;
12002 ldomNode * node = getNode();
12003 lString32 text = node->getText();
12004 int textLen = text.length();
12005 int i = _data->getOffset();
12006 lChar32 currCh = i<textLen ? text[i] : 0;
12007 lChar32 prevCh = i>0 ? text[i-1] : 0;
12008 if ( IsUnicodeSpaceOrNull(currCh) ) {
12009 switch (prevCh) {
12010 case 0:
12011 case '.':
12012 case '?':
12013 case '!':
12014 case U'\x2026': // horizontal ellypsis
12015 return true;
12016 default:
12017 break;
12018 }
12019 }
12020 // word is not ended with . ! ?
12021 // check whether it's last word of block
12022 ldomXPointerEx pos(*this);
12023 //return !pos.nextVisibleWordStartInSentence();
12024 return !pos.thisVisibleWordEndInSentence();
12025 }
12026
12027 /// move to beginning of current visible text sentence
thisSentenceStart()12028 bool ldomXPointerEx::thisSentenceStart()
12029 {
12030 if ( isNull() )
12031 return false;
12032 if ( !isText() && !nextVisibleText() && !prevVisibleText() )
12033 return false;
12034 for (;;) {
12035 if ( isSentenceStart() )
12036 return true;
12037 if ( !prevVisibleWordStartInSentence() )
12038 return false;
12039 }
12040 }
12041
12042 /// move to end of current visible text sentence
thisSentenceEnd()12043 bool ldomXPointerEx::thisSentenceEnd()
12044 {
12045 if ( isNull() )
12046 return false;
12047 if ( !isText() && !nextVisibleText() && !prevVisibleText() )
12048 return false;
12049 for (;;) {
12050 if ( isSentenceEnd() )
12051 return true;
12052 if ( !nextVisibleWordEndInSentence() )
12053 return false;
12054 }
12055 }
12056
12057 /// move to beginning of next visible text sentence
nextSentenceStart()12058 bool ldomXPointerEx::nextSentenceStart()
12059 {
12060 if ( !isSentenceStart() && !thisSentenceEnd() )
12061 return false;
12062 for (;;) {
12063 if ( !nextVisibleWordStartInSentence() )
12064 return false;
12065 if ( isSentenceStart() )
12066 return true;
12067 }
12068 }
12069
12070 /// move to beginning of prev visible text sentence
prevSentenceStart()12071 bool ldomXPointerEx::prevSentenceStart()
12072 {
12073 if ( !thisSentenceStart() )
12074 return false;
12075 for (;;) {
12076 if ( !prevVisibleWordStartInSentence() )
12077 return false;
12078 if ( isSentenceStart() )
12079 return true;
12080 }
12081 }
12082
12083 /// move to end of next visible text sentence
nextSentenceEnd()12084 bool ldomXPointerEx::nextSentenceEnd()
12085 {
12086 if ( !nextSentenceStart() )
12087 return false;
12088 return thisSentenceEnd();
12089 }
12090
12091 /// move to end of next visible text sentence
prevSentenceEnd()12092 bool ldomXPointerEx::prevSentenceEnd()
12093 {
12094 if ( !thisSentenceStart() )
12095 return false;
12096 for (;;) {
12097 if ( !prevVisibleWordEndInSentence() )
12098 return false;
12099 if ( isSentenceEnd() )
12100 return true;
12101 }
12102 }
12103
12104 /// if start is after end, swap start and end
sort()12105 void ldomXRange::sort()
12106 {
12107 if ( _start.isNull() || _end.isNull() )
12108 return;
12109 if ( _start.compare(_end) > 0 ) {
12110 ldomXPointer p1( _start );
12111 ldomXPointer p2( _end );
12112 _start = p2;
12113 _end = p1;
12114 }
12115 }
12116
12117 /// backward iteration by elements of DOM three
prevElement()12118 bool ldomXPointerEx::prevElement()
12119 {
12120 if ( !ensureElement() )
12121 return false;
12122 for (;;) {
12123 if ( prevSiblingElement() ) {
12124 while ( lastElementChild() )
12125 ;
12126 return true;
12127 }
12128 if ( !parent() )
12129 return false;
12130 return true;
12131 }
12132 }
12133
12134 /// move to next final visible node (~paragraph)
nextVisibleFinal()12135 bool ldomXPointerEx::nextVisibleFinal()
12136 {
12137 for ( ;; ) {
12138 if ( !nextElement() )
12139 return false;
12140 if ( isVisibleFinal() )
12141 return true;
12142 }
12143 }
12144
12145 /// move to previous final visible node (~paragraph)
prevVisibleFinal()12146 bool ldomXPointerEx::prevVisibleFinal()
12147 {
12148 for ( ;; ) {
12149 if ( !prevElement() )
12150 return false;
12151 if ( isVisibleFinal() )
12152 return true;
12153 }
12154 }
12155
12156 /// run callback for each node in range
forEach(ldomNodeCallback * callback)12157 void ldomXRange::forEach( ldomNodeCallback * callback )
12158 {
12159 if ( isNull() )
12160 return;
12161 ldomXRange pos( _start, _end, 0 );
12162 bool allowGoRecurse = true;
12163 while ( !pos._start.isNull() && pos._start.compare( _end ) < 0 ) {
12164 // do something
12165 ldomNode * node = pos._start.getNode();
12166 //lString32 path = pos._start.toString();
12167 //CRLog::trace( "%s", UnicodeToUtf8(path).c_str() );
12168 if ( node->isElement() ) {
12169 allowGoRecurse = callback->onElement( &pos.getStart() );
12170 } else if ( node->isText() ) {
12171 lString32 txt = node->getText();
12172 pos._end = pos._start;
12173 pos._start.setOffset( 0 );
12174 pos._end.setOffset( txt.length() );
12175 if ( _start.getNode() == node ) {
12176 pos._start.setOffset( _start.getOffset() );
12177 }
12178 if ( _end.getNode() == node && pos._end.getOffset() > _end.getOffset()) {
12179 pos._end.setOffset( _end.getOffset() );
12180 }
12181 callback->onText( &pos );
12182 allowGoRecurse = false;
12183 }
12184 // move to next item
12185 bool stop = false;
12186 if ( !allowGoRecurse || !pos._start.child(0) ) {
12187 while ( !pos._start.nextSibling() ) {
12188 if ( !pos._start.parent() ) {
12189 stop = true;
12190 break;
12191 }
12192 }
12193 }
12194 if ( stop )
12195 break;
12196 }
12197 }
12198
12199 class ldomWordsCollector : public ldomNodeCallback {
12200 LVArray<ldomWord> & _list;
operator =(ldomWordsCollector &)12201 ldomWordsCollector & operator = (ldomWordsCollector&) {
12202 // no assignment
12203 return *this;
12204 }
12205 public:
ldomWordsCollector(LVArray<ldomWord> & list)12206 ldomWordsCollector( LVArray<ldomWord> & list )
12207 : _list( list )
12208 {
12209 }
12210 /// called for each found text fragment in range
onText(ldomXRange * nodeRange)12211 virtual void onText( ldomXRange * nodeRange )
12212 {
12213 ldomNode * node = nodeRange->getStart().getNode();
12214 lString32 text = node->getText();
12215 int len = text.length();
12216 int end = nodeRange->getEnd().getOffset();
12217 if ( len>end )
12218 len = end;
12219 int beginOfWord = -1;
12220 for ( int i=nodeRange->getStart().getOffset(); i <= len; i++ ) {
12221 // int alpha = lGetCharProps(text[i]) & CH_PROP_ALPHA;
12222 // Also allow digits (years, page numbers) to be considered words
12223 // int alpha = lGetCharProps(text[i]) & (CH_PROP_ALPHA|CH_PROP_DIGIT|CH_PROP_HYPHEN);
12224 // We use lStr_isWordSeparator() as the other word finding/skipping functions do,
12225 // so they all share the same notion of what a word is.
12226 int alpha = !lStr_isWordSeparator(text[i]); // alpha, number, CJK char
12227 if (alpha && beginOfWord<0 ) {
12228 beginOfWord = i;
12229 }
12230 if ( !alpha && beginOfWord>=0) { // space, punctuation, sign, paren...
12231 _list.add( ldomWord( node, beginOfWord, i ) );
12232 beginOfWord = -1;
12233 }
12234 if (lGetCharProps(text[i]) == CH_PROP_CJK && i < len) { // a CJK char makes its own word
12235 _list.add( ldomWord( node, i, i+1 ) );
12236 beginOfWord = -1;
12237 }
12238 }
12239 }
12240 /// called for each found node in range
onElement(ldomXPointerEx * ptr)12241 virtual bool onElement( ldomXPointerEx * ptr )
12242 {
12243 ldomNode * elem = ptr->getNode();
12244 if ( elem->getRendMethod()==erm_invisible )
12245 return false;
12246 return true;
12247 }
12248 };
12249
12250 /// get all words from specified range
getRangeWords(LVArray<ldomWord> & list)12251 void ldomXRange::getRangeWords( LVArray<ldomWord> & list )
12252 {
12253 ldomWordsCollector collector( list );
12254 forEach( &collector );
12255 }
12256
12257 /// adds all visible words from range, returns number of added words
addRangeWords(ldomXRange & range,bool)12258 int ldomWordExList::addRangeWords( ldomXRange & range, bool /*trimPunctuation*/ ) {
12259 LVArray<ldomWord> list;
12260 range.getRangeWords( list );
12261 for ( int i=0; i<list.length(); i++ )
12262 add( new ldomWordEx(list[i]) );
12263 init();
12264 return list.length();
12265 }
12266
getMiddlePoint()12267 lvPoint ldomMarkedRange::getMiddlePoint() {
12268 if ( start.y==end.y ) {
12269 return lvPoint( ((start.x + end.x)>>1), start.y );
12270 } else {
12271 return start;
12272 }
12273 }
12274
12275 /// returns distance (dx+dy) from specified point to middle point
calcDistance(int x,int y,MoveDirection dir)12276 int ldomMarkedRange::calcDistance( int x, int y, MoveDirection dir ) {
12277 lvPoint middle = getMiddlePoint();
12278 int dx = middle.x - x;
12279 int dy = middle.y - y;
12280 if ( dx<0 ) dx = -dx;
12281 if ( dy<0 ) dy = -dy;
12282 switch (dir) {
12283 case DIR_LEFT:
12284 case DIR_RIGHT:
12285 return dx + dy;
12286 case DIR_UP:
12287 case DIR_DOWN:
12288 return dx + dy*100;
12289 case DIR_ANY:
12290 return dx + dy;
12291 }
12292
12293
12294 return dx + dy;
12295 }
12296
12297 /// select word
selectWord(ldomWordEx * word,MoveDirection dir)12298 void ldomWordExList::selectWord( ldomWordEx * word, MoveDirection dir )
12299 {
12300 selWord = word;
12301 if ( selWord ) {
12302 lvPoint middle = word->getMark().getMiddlePoint();
12303 if ( x==-1 || (dir!=DIR_UP && dir!=DIR_DOWN) )
12304 x = middle.x;
12305 y = middle.y;
12306 } else {
12307 x = y = -1;
12308 }
12309 }
12310
12311 /// select next word in specified direction
selectNextWord(MoveDirection dir,int moveBy)12312 ldomWordEx * ldomWordExList::selectNextWord( MoveDirection dir, int moveBy )
12313 {
12314 if ( !selWord )
12315 return selectMiddleWord();
12316 pattern.clear();
12317 for ( int i=0; i<moveBy; i++ ) {
12318 ldomWordEx * word = findNearestWord( x, y, dir );
12319 if ( word )
12320 selectWord( word, dir );
12321 }
12322 return selWord;
12323 }
12324
12325 /// select middle word in range
selectMiddleWord()12326 ldomWordEx * ldomWordExList::selectMiddleWord() {
12327 if ( minx==-1 )
12328 init();
12329 ldomWordEx * word = findNearestWord( (maxx+minx)/2, (miny+maxy)/2, DIR_ANY );
12330 selectWord(word, DIR_ANY);
12331 return word;
12332 }
12333
findWordByPattern()12334 ldomWordEx * ldomWordExList::findWordByPattern()
12335 {
12336 ldomWordEx * lastBefore = NULL;
12337 ldomWordEx * firstAfter = NULL;
12338 bool selReached = false;
12339 for ( int i=0; i<length(); i++ ) {
12340 ldomWordEx * item = get(i);
12341 if ( item==selWord )
12342 selReached = true;
12343 lString32 text = item->getText();
12344 text.lowercase();
12345 bool flg = true;
12346 for ( int j=0; j<pattern.length(); j++ ) {
12347 if ( j>=text.length() ) {
12348 flg = false;
12349 break;
12350 }
12351 lString32 chars = pattern[j];
12352 chars.lowercase();
12353 bool charFound = false;
12354 for ( int k=0; k<chars.length(); k++ ) {
12355 if ( chars[k]==text[j] ) {
12356 charFound = true;
12357 break;
12358 }
12359 }
12360 if ( !charFound ) {
12361 flg = false;
12362 break;
12363 }
12364 }
12365 if ( !flg )
12366 continue;
12367 if ( selReached ) {
12368 if ( firstAfter==NULL )
12369 firstAfter = item;
12370 } else {
12371 lastBefore = item;
12372 }
12373 }
12374
12375 if ( firstAfter )
12376 return firstAfter;
12377 else
12378 return lastBefore;
12379 }
12380
12381 /// try append search pattern and find word
appendPattern(lString32 chars)12382 ldomWordEx * ldomWordExList::appendPattern(lString32 chars)
12383 {
12384 pattern.add(chars);
12385 ldomWordEx * foundWord = findWordByPattern();
12386
12387 if ( foundWord ) {
12388 selectWord(foundWord, DIR_ANY);
12389 } else {
12390 pattern.erase(pattern.length()-1, 1);
12391 }
12392 return foundWord;
12393 }
12394
12395 /// remove last character from pattern and try to search
reducePattern()12396 ldomWordEx * ldomWordExList::reducePattern()
12397 {
12398 if ( pattern.length()==0 )
12399 return NULL;
12400 pattern.erase(pattern.length()-1, 1);
12401 ldomWordEx * foundWord = findWordByPattern();
12402
12403 if ( foundWord )
12404 selectWord(foundWord, DIR_ANY);
12405 return foundWord;
12406 }
12407
12408 /// find word nearest to specified point
findNearestWord(int x,int y,MoveDirection dir)12409 ldomWordEx * ldomWordExList::findNearestWord( int x, int y, MoveDirection dir ) {
12410 if ( !length() )
12411 return NULL;
12412 int bestDistance = -1;
12413 ldomWordEx * bestWord = NULL;
12414 ldomWordEx * defWord = (dir==DIR_LEFT || dir==DIR_UP) ? get(length()-1) : get(0);
12415 int i;
12416 if ( dir==DIR_LEFT || dir==DIR_RIGHT ) {
12417 int thisLineY = -1;
12418 int thisLineDy = -1;
12419 for ( i=0; i<length(); i++ ) {
12420 ldomWordEx * item = get(i);
12421 lvPoint middle = item->getMark().getMiddlePoint();
12422 int dy = middle.y - y;
12423 if ( dy<0 ) dy = -dy;
12424 if ( thisLineY==-1 || thisLineDy>dy ) {
12425 thisLineY = middle.y;
12426 thisLineDy = dy;
12427 }
12428 }
12429 ldomWordEx * nextLineWord = NULL;
12430 for ( i=0; i<length(); i++ ) {
12431 ldomWordEx * item = get(i);
12432 if ( dir!=DIR_ANY && item==selWord )
12433 continue;
12434 ldomMarkedRange * mark = &item->getMark();
12435 lvPoint middle = mark->getMiddlePoint();
12436 switch ( dir ) {
12437 case DIR_LEFT:
12438 if ( middle.y<thisLineY )
12439 nextLineWord = item; // last word of prev line
12440 if ( middle.x>=x )
12441 continue;
12442 break;
12443 case DIR_RIGHT:
12444 if ( nextLineWord==NULL && middle.y>thisLineY )
12445 nextLineWord = item; // first word of next line
12446 if ( middle.x<=x )
12447 continue;
12448 break;
12449 case DIR_UP:
12450 case DIR_DOWN:
12451 case DIR_ANY:
12452 // none
12453 break;
12454 }
12455 if ( middle.y!=thisLineY )
12456 continue;
12457 int dist = mark->calcDistance(x, y, dir);
12458 if ( bestDistance==-1 || dist<bestDistance ) {
12459 bestWord = item;
12460 bestDistance = dist;
12461 }
12462 }
12463 if ( bestWord!=NULL )
12464 return bestWord; // found in the same line
12465 if ( nextLineWord!=NULL )
12466 return nextLineWord;
12467 return defWord;
12468 }
12469 for ( i=0; i<length(); i++ ) {
12470 ldomWordEx * item = get(i);
12471 if ( dir!=DIR_ANY && item==selWord )
12472 continue;
12473 ldomMarkedRange * mark = &item->getMark();
12474 lvPoint middle = mark->getMiddlePoint();
12475 if ( dir==DIR_UP && middle.y >= y )
12476 continue;
12477 if ( dir==DIR_DOWN && middle.y <= y )
12478 continue;
12479
12480 int dist = mark->calcDistance(x, y, dir);
12481 if ( bestDistance==-1 || dist<bestDistance ) {
12482 bestWord = item;
12483 bestDistance = dist;
12484 }
12485 }
12486 if ( bestWord!=NULL )
12487 return bestWord;
12488 return defWord;
12489 }
12490
init()12491 void ldomWordExList::init()
12492 {
12493 if ( !length() )
12494 return;
12495 for ( int i=0; i<length(); i++ ) {
12496 ldomWordEx * item = get(i);
12497 lvPoint middle = item->getMark().getMiddlePoint();
12498 if ( i==0 || minx > middle.x )
12499 minx = middle.x;
12500 if ( i==0 || maxx < middle.x )
12501 maxx = middle.x;
12502 if ( i==0 || miny > middle.y )
12503 miny = middle.y;
12504 if ( i==0 || maxy < middle.y )
12505 maxy = middle.y;
12506 }
12507 }
12508
12509
12510 class ldomTextCollector : public ldomNodeCallback
12511 {
12512 private:
12513 bool lastText;
12514 bool newBlock;
12515 lChar32 delimiter;
12516 int maxLen;
12517 lString32 text;
12518 public:
ldomTextCollector(lChar32 blockDelimiter,int maxTextLen)12519 ldomTextCollector( lChar32 blockDelimiter, int maxTextLen )
12520 : lastText(false), newBlock(true), delimiter( blockDelimiter), maxLen( maxTextLen )
12521 {
12522 }
12523 /// destructor
~ldomTextCollector()12524 virtual ~ldomTextCollector() { }
12525 /// called for each found text fragment in range
onText(ldomXRange * nodeRange)12526 virtual void onText( ldomXRange * nodeRange )
12527 {
12528 if ( newBlock && !text.empty()) {
12529 text << delimiter;
12530 }
12531 lString32 txt = nodeRange->getStart().getNode()->getText();
12532 int start = nodeRange->getStart().getOffset();
12533 int end = nodeRange->getEnd().getOffset();
12534 if ( start < end ) {
12535 text << txt.substr( start, end-start );
12536 }
12537 lastText = true;
12538 newBlock = false;
12539 }
12540 /// called for each found node in range
onElement(ldomXPointerEx * ptr)12541 virtual bool onElement( ldomXPointerEx * ptr )
12542 {
12543 #if BUILD_LITE!=1
12544 ldomNode * elem = (ldomNode *)ptr->getNode();
12545 // Allow tweaking that with hints
12546 css_style_ref_t style = elem->getStyle();
12547 if ( STYLE_HAS_CR_HINT(style, TEXT_SELECTION_SKIP) ) {
12548 return false;
12549 }
12550 else if ( STYLE_HAS_CR_HINT(style, TEXT_SELECTION_INLINE) ) {
12551 newBlock = false;
12552 return true;
12553 }
12554 else if ( STYLE_HAS_CR_HINT(style, TEXT_SELECTION_BLOCK) ) {
12555 newBlock = true;
12556 return true;
12557 }
12558 lvdom_element_render_method rm = elem->getRendMethod();
12559 if ( rm == erm_invisible )
12560 return false;
12561 if ( rm == erm_inline ) {
12562 // Don't set newBlock if rendering method is erm_inline,
12563 // no matter the original CSS display.
12564 // (Don't reset any previously set and not consumed newBlock)
12565 return true;
12566 }
12567 // For other rendering methods (that would bring newBlock=true),
12568 // look at the initial CSS display, as we might have boxed some
12569 // inline-like elements for rendering purpose.
12570 css_display_t d = style->display;
12571 if ( d <= css_d_inline || d == css_d_inline_block || d == css_d_inline_table ) {
12572 // inline, ruby; consider inline-block/-table as inline, in case
12573 // they don't contain much (if they do, some inner block element
12574 // will set newBlock=true).
12575 return true;
12576 }
12577 // Otherwise, it's a block like node, and we want a \n before the next text
12578 newBlock = true;
12579 return true;
12580 #else
12581 newBlock = true;
12582 return true;
12583 #endif
12584 }
12585 /// get collected text
getText()12586 lString32 getText() { return text; }
12587 };
12588
12589 /// returns text between two XPointer positions
getRangeText(lChar32 blockDelimiter,int maxTextLen)12590 lString32 ldomXRange::getRangeText( lChar32 blockDelimiter, int maxTextLen )
12591 {
12592 ldomTextCollector callback( blockDelimiter, maxTextLen );
12593 forEach( &callback );
12594 return removeSoftHyphens( callback.getText() );
12595 }
12596
12597 /// returns href attribute of <A> element, plus xpointer of <A> element itself
getHRef(ldomXPointer & a_xpointer)12598 lString32 ldomXPointer::getHRef(ldomXPointer & a_xpointer)
12599 {
12600 if ( isNull() )
12601 return lString32::empty_str;
12602 ldomNode * node = getNode();
12603 while ( node && !node->isElement() )
12604 node = node->getParentNode();
12605 while ( node && node->getNodeId()!=el_a )
12606 node = node->getParentNode();
12607 if ( !node )
12608 return lString32::empty_str;
12609 a_xpointer.setNode(node);
12610 a_xpointer.setOffset(0);
12611 lString32 ref = node->getAttributeValue( LXML_NS_ANY, attr_href );
12612 if (!ref.empty() && ref[0] != '#')
12613 ref = DecodeHTMLUrlString(ref);
12614 return ref;
12615 }
12616
12617 /// returns href attribute of <A> element, null string if not found
getHRef()12618 lString32 ldomXPointer::getHRef()
12619 {
12620 ldomXPointer unused_a_xpointer;
12621 return getHRef(unused_a_xpointer);
12622 }
12623
12624 /// returns href attribute of <A> element, plus xpointer of <A> element itself
getHRef(ldomXPointer & a_xpointer)12625 lString32 ldomXRange::getHRef(ldomXPointer & a_xpointer)
12626 {
12627 if ( isNull() )
12628 return lString32::empty_str;
12629 return _start.getHRef(a_xpointer);
12630 }
12631
12632 /// returns href attribute of <A> element, null string if not found
getHRef()12633 lString32 ldomXRange::getHRef()
12634 {
12635 if ( isNull() )
12636 return lString32::empty_str;
12637 return _start.getHRef();
12638 }
12639
12640
LVParseXMLStream(LVStreamRef stream,const elem_def_t * elem_table,const attr_def_t * attr_table,const ns_def_t * ns_table)12641 ldomDocument * LVParseXMLStream( LVStreamRef stream,
12642 const elem_def_t * elem_table,
12643 const attr_def_t * attr_table,
12644 const ns_def_t * ns_table )
12645 {
12646 if ( stream.isNull() )
12647 return NULL;
12648 bool error = true;
12649 ldomDocument * doc;
12650 doc = new ldomDocument();
12651 doc->setDocFlags( 0 );
12652
12653 ldomDocumentWriter writer(doc);
12654 doc->setNodeTypes( elem_table );
12655 doc->setAttributeTypes( attr_table );
12656 doc->setNameSpaceTypes( ns_table );
12657
12658 /// FB2 format
12659 LVFileFormatParser * parser = new LVXMLParser(stream, &writer);
12660 if ( parser->CheckFormat() ) {
12661 if ( parser->Parse() ) {
12662 error = false;
12663 }
12664 }
12665 delete parser;
12666 if ( error ) {
12667 delete doc;
12668 doc = NULL;
12669 }
12670 return doc;
12671 }
12672
LVParseHTMLStream(LVStreamRef stream,const elem_def_t * elem_table,const attr_def_t * attr_table,const ns_def_t * ns_table)12673 ldomDocument * LVParseHTMLStream( LVStreamRef stream,
12674 const elem_def_t * elem_table,
12675 const attr_def_t * attr_table,
12676 const ns_def_t * ns_table )
12677 {
12678 if ( stream.isNull() )
12679 return NULL;
12680 bool error = true;
12681 ldomDocument * doc;
12682 doc = new ldomDocument();
12683 doc->setDocFlags( 0 );
12684
12685 ldomDocumentWriterFilter writerFilter(doc, false, HTML_AUTOCLOSE_TABLE);
12686 doc->setNodeTypes( elem_table );
12687 doc->setAttributeTypes( attr_table );
12688 doc->setNameSpaceTypes( ns_table );
12689
12690 /// FB2 format
12691 LVFileFormatParser * parser = new LVHTMLParser(stream, &writerFilter);
12692 if ( parser->CheckFormat() ) {
12693 if ( parser->Parse() ) {
12694 error = false;
12695 }
12696 }
12697 delete parser;
12698 if ( error ) {
12699 delete doc;
12700 doc = NULL;
12701 }
12702 return doc;
12703 }
12704
12705 #if 0
12706 static lString32 escapeDocPath( lString32 path )
12707 {
12708 for ( int i=0; i<path.length(); i++ ) {
12709 lChar32 ch = path[i];
12710 if ( ch=='/' || ch=='\\')
12711 path[i] = '_';
12712 }
12713 return path;
12714 }
12715 #endif
12716
12717 /////////////////////////////////////////////////////////////////
12718 /// ldomDocumentFragmentWriter
12719 // Used for EPUB with each individual HTML files in the EPUB,
12720 // drives ldomDocumentWriter to build one single document from them.
12721
convertId(lString32 id)12722 lString32 ldomDocumentFragmentWriter::convertId( lString32 id )
12723 {
12724 if ( !codeBasePrefix.empty() ) {
12725 return codeBasePrefix + "_" + " " + id;//add a space for later
12726 }
12727 return id;
12728 }
12729
convertHref(lString32 href)12730 lString32 ldomDocumentFragmentWriter::convertHref( lString32 href )
12731 {
12732 if ( href.pos("://")>=0 )
12733 return href; // fully qualified href: no conversion
12734 if ( href.length() > 10 && href[4] == ':' && href.startsWith(lString32("data:image/")) )
12735 return href; // base64 encoded image (<img src="data:image/png;base64,iVBORw0KG...>): no conversion
12736
12737 //CRLog::trace("convertHref(%s, codeBase=%s, filePathName=%s)", LCSTR(href), LCSTR(codeBase), LCSTR(filePathName));
12738
12739 if (href[0] == '#') {
12740 // Link to anchor in the same docFragment
12741 lString32 replacement = pathSubstitutions.get(filePathName);
12742 if (replacement.empty())
12743 return href;
12744 lString32 p = cs32("#") + replacement + "_" + " " + href.substr(1);
12745 //CRLog::trace("href %s -> %s", LCSTR(href), LCSTR(p));
12746 return p;
12747 }
12748
12749 // href = LVCombinePaths(codeBase, href);
12750
12751 // Depending on what's calling us, href may or may not have
12752 // gone thru DecodeHTMLUrlString() to decode %-encoded bits.
12753 // We'll need to try again with DecodeHTMLUrlString() if not
12754 // initially found in "pathSubstitutions" (whose filenames went
12755 // thru DecodeHTMLUrlString(), and so did 'codeBase').
12756
12757 // resolve relative links
12758 lString32 p, id; // path, id
12759 if ( !href.split2(cs32("#"), p, id) )
12760 p = href;
12761 if ( p.empty() ) {
12762 //CRLog::trace("codebase = %s -> href = %s", LCSTR(codeBase), LCSTR(href));
12763 if ( codeBasePrefix.empty() )
12764 return LVCombinePaths(codeBase, href);
12765 p = codeBasePrefix;
12766 }
12767 else {
12768 lString32 replacement = pathSubstitutions.get(LVCombinePaths(codeBase, p));
12769 //CRLog::trace("href %s -> %s", LCSTR(p), LCSTR(replacement));
12770 if ( !replacement.empty() )
12771 p = replacement;
12772 else {
12773 // Try again after DecodeHTMLUrlString()
12774 p = DecodeHTMLUrlString(p);
12775 replacement = pathSubstitutions.get(LVCombinePaths(codeBase, p));
12776 if ( !replacement.empty() )
12777 p = replacement;
12778 else
12779 return LVCombinePaths(codeBase, href);
12780 }
12781 //else
12782 // p = codeBasePrefix;
12783 //p = LVCombinePaths( codeBase, p ); // relative to absolute path
12784 }
12785 if ( !id.empty() )
12786 p = p + "_" + " " + id;
12787
12788 p = cs32("#") + p;
12789
12790 //CRLog::debug("converted href=%s to %s", LCSTR(href), LCSTR(p) );
12791
12792 return p;
12793 }
12794
setCodeBase(lString32 fileName)12795 void ldomDocumentFragmentWriter::setCodeBase( lString32 fileName )
12796 {
12797 filePathName = fileName;
12798 codeBasePrefix = pathSubstitutions.get(fileName);
12799 codeBase = LVExtractPath(filePathName);
12800 if ( codeBasePrefix.empty() ) {
12801 CRLog::trace("codeBasePrefix is empty for path %s", LCSTR(fileName));
12802 codeBasePrefix = pathSubstitutions.get(fileName);
12803 }
12804 stylesheetFile.clear();
12805 }
12806
12807 /// called on attribute
OnAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)12808 void ldomDocumentFragmentWriter::OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue )
12809 {
12810 if ( insideTag ) {
12811 if ( !lStr_cmp(attrname, "href") || !lStr_cmp(attrname, "src") ) {
12812 parent->OnAttribute(nsname, attrname, convertHref(lString32(attrvalue)).c_str() );
12813 } else if ( !lStr_cmp(attrname, "id") ) {
12814 parent->OnAttribute(nsname, attrname, convertId(lString32(attrvalue)).c_str() );
12815 } else if ( !lStr_cmp(attrname, "name") ) {
12816 //CRLog::trace("name attribute = %s", LCSTR(lString32(attrvalue)));
12817 parent->OnAttribute(nsname, attrname, convertId(lString32(attrvalue)).c_str() );
12818 } else {
12819 parent->OnAttribute(nsname, attrname, attrvalue);
12820 }
12821 } else {
12822 if (insideHtmlTag) {
12823 // Grab attributes from <html dir="rtl" lang="he"> (not included in the DOM)
12824 // to reinject them in <DocFragment>
12825 if ( !lStr_cmp(attrname, "dir") )
12826 htmlDir = attrvalue;
12827 else if ( !lStr_cmp(attrname, "lang") )
12828 htmlLang = attrvalue;
12829 }
12830 else if ( styleDetectionState ) {
12831 if ( !lStr_cmp(attrname, "rel") && lString32(attrvalue).lowercase() == U"stylesheet" )
12832 styleDetectionState |= 2;
12833 else if ( !lStr_cmp(attrname, "type") ) {
12834 if ( lString32(attrvalue).lowercase() == U"text/css")
12835 styleDetectionState |= 4;
12836 else
12837 styleDetectionState = 0; // text/css type supported only
12838 } else if ( !lStr_cmp(attrname, "href") ) {
12839 styleDetectionState |= 8;
12840 lString32 href = attrvalue;
12841 if ( stylesheetFile.empty() )
12842 tmpStylesheetFile = LVCombinePaths( codeBase, href );
12843 else
12844 tmpStylesheetFile = href;
12845 }
12846 if (styleDetectionState == 15) {
12847 if ( !stylesheetFile.empty() )
12848 stylesheetLinks.add(tmpStylesheetFile);
12849 else
12850 stylesheetFile = tmpStylesheetFile;
12851 styleDetectionState = 0;
12852 CRLog::trace("CSS file href: %s", LCSTR(stylesheetFile));
12853 }
12854 }
12855 }
12856 }
12857
12858 /// called on opening tag
OnTagOpen(const lChar32 * nsname,const lChar32 * tagname)12859 ldomNode * ldomDocumentFragmentWriter::OnTagOpen( const lChar32 * nsname, const lChar32 * tagname )
12860 {
12861 if ( insideTag ) {
12862 return parent->OnTagOpen(nsname, tagname);
12863 } else {
12864 if ( !lStr_cmp(tagname, "link") )
12865 styleDetectionState = 1;
12866 else if ( !lStr_cmp(tagname, "style") )
12867 headStyleState = 1;
12868 else if ( !lStr_cmp(tagname, "html") ) {
12869 insideHtmlTag = true;
12870 htmlDir.clear();
12871 htmlLang.clear();
12872 }
12873 }
12874
12875 // When meeting the <body> of each of an EPUB's embedded HTML files,
12876 // we will insert into parent (the ldomDocumentWriter that makes out a single
12877 // document) a <DocFragment> wrapping that <body>. It may end up as:
12878 //
12879 // <DocFragment StyleSheet="OEBPS/Styles/main.css" id="_doc_fragment_2">
12880 // <stylesheet href="OEBPS/Text/">
12881 // @import url("../Styles/other.css");
12882 // @import url(path_to_3rd_css_file)
12883 // here is <HEAD><STYLE> content
12884 // </stylesheet>
12885 // <body>
12886 // here is original <BODY> content
12887 // </body>
12888 // </DocFragment>
12889 //
12890 // (Why one css file link in an attribute and others in the tag?
12891 // I suppose it's because attribute values are hashed and stored only
12892 // once, so it saves space in the DOM/cache for documents with many
12893 // fragments and a single CSS link, which is the most usual case.)
12894
12895 if ( !insideTag && baseTag==tagname ) { // with EPUBs: baseTag="body"
12896 insideTag = true;
12897 if ( !baseTagReplacement.empty() ) { // with EPUBs: baseTagReplacement="DocFragment"
12898 baseElement = parent->OnTagOpen(U"", baseTagReplacement.c_str()); // start <DocFragment
12899 lastBaseElement = baseElement;
12900 if ( !stylesheetFile.empty() ) {
12901 // add attribute <DocFragment StyleSheet="path_to_css_1st_file"
12902 parent->OnAttribute(U"", U"StyleSheet", stylesheetFile.c_str() );
12903 CRLog::debug("Setting StyleSheet attribute to %s for document fragment", LCSTR(stylesheetFile) );
12904 }
12905 if ( !codeBasePrefix.empty() ) // add attribute <DocFragment id="..html_file_name"
12906 parent->OnAttribute(U"", U"id", codeBasePrefix.c_str() );
12907 if ( !htmlDir.empty() ) // add attribute <DocFragment dir="rtl" from <html dir="rtl"> tag
12908 parent->OnAttribute(U"", U"dir", htmlDir.c_str() );
12909 if ( !htmlLang.empty() ) // add attribute <DocFragment lang="ar" from <html lang="ar"> tag
12910 parent->OnAttribute(U"", U"lang", htmlLang.c_str() );
12911 if (this->m_nonlinear)
12912 parent->OnAttribute(U"", U"NonLinear", U"" );
12913
12914 parent->OnTagBody(); // inside <DocFragment>
12915 if ( !headStyleText.empty() || stylesheetLinks.length() > 0 ) {
12916 // add stylesheet element as child of <DocFragment>: <stylesheet href="...">
12917 parent->OnTagOpen(U"", U"stylesheet");
12918 parent->OnAttribute(U"", U"href", codeBase.c_str() );
12919 lString32 imports;
12920 for (int i = 0; i < stylesheetLinks.length(); i++) {
12921 lString32 import("@import url(\"");
12922 import << stylesheetLinks.at(i);
12923 import << "\");\n";
12924 imports << import;
12925 }
12926 stylesheetLinks.clear();
12927 lString32 styleText = imports + headStyleText.c_str();
12928 // Add it to <DocFragment><stylesheet>, so it becomes:
12929 // <stylesheet href="...">
12930 // @import url(path_to_css_2nd_file)
12931 // @import url(path_to_css_3rd_file)
12932 // here is <HEAD><STYLE> content
12933 // </stylesheet>
12934 parent->OnTagBody();
12935 parent->OnText(styleText.c_str(), styleText.length(), 0);
12936 parent->OnTagClose(U"", U"stylesheet");
12937 // done with <DocFragment><stylesheet>...</stylesheet>
12938 }
12939 // Finally, create <body> and go on.
12940 // The styles we have just set via <stylesheet> element and
12941 // StyleSheet= attribute will be applied by this OnTagOpen("body")
12942 // (including those that may apply to body itself), push()'ing
12943 // the previous stylesheet state, that will be pop()'ed when the
12944 // ldomElementWriter for DocFragment is left/destroyed (by onBodyExit(),
12945 // because this OnTagOpen has set to it _stylesheetIsSet).
12946 parent->OnTagOpen(U"", baseTag.c_str());
12947 parent->OnTagBody();
12948 return baseElement;
12949 }
12950 }
12951 return NULL;
12952 }
12953
12954 /// called on closing tag
OnTagClose(const lChar32 * nsname,const lChar32 * tagname,bool self_closing_tag)12955 void ldomDocumentFragmentWriter::OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag )
12956 {
12957 styleDetectionState = headStyleState = 0;
12958 if ( insideTag && baseTag==tagname ) {
12959 insideTag = false;
12960 if ( !baseTagReplacement.empty() ) {
12961 parent->OnTagClose(U"", baseTag.c_str());
12962 parent->OnTagClose(U"", baseTagReplacement.c_str());
12963 }
12964 baseElement = NULL;
12965 return;
12966 }
12967 if ( insideTag )
12968 parent->OnTagClose(nsname, tagname, self_closing_tag);
12969 }
12970
12971 /// called after > of opening tag (when entering tag body) or just before /> closing tag for empty tags
OnTagBody()12972 void ldomDocumentFragmentWriter::OnTagBody()
12973 {
12974 if ( insideTag ) {
12975 parent->OnTagBody();
12976 }
12977 else if ( insideHtmlTag ) {
12978 insideHtmlTag = false;
12979 }
12980 if ( styleDetectionState == 11 ) {
12981 // incomplete <link rel="stylesheet", href="..." />; assuming type="text/css"
12982 if ( !stylesheetFile.empty() )
12983 stylesheetLinks.add(tmpStylesheetFile);
12984 else
12985 stylesheetFile = tmpStylesheetFile;
12986 styleDetectionState = 0;
12987 } else
12988 styleDetectionState = 0;
12989 }
12990
12991
12992
12993 /////////////////////////////////////////////////////////////////
12994 /// ldomDocumentWriterFilter
12995 // Used to parse lousy HTML in formats: HTML, CHM, PDB(html)
12996 // For all these document formats, it is fed by HTMLParser that does
12997 // convert to lowercase the tag names and attributes.
12998 // ldomDocumentWriterFilter does then deal with auto-closing unbalanced
12999 // HTML tags according to the rules set in crengine/src/lvxml.cpp HTML_AUTOCLOSE_TABLE[]
13000
13001 /** \brief callback object to fill DOM tree
13002
13003 To be used with XML parser as callback object.
13004
13005 Creates document according to incoming events.
13006
13007 Autoclose HTML tags.
13008 */
13009
setClass(const lChar32 * className,bool overrideExisting)13010 void ldomDocumentWriterFilter::setClass( const lChar32 * className, bool overrideExisting )
13011 {
13012 ldomNode * node = _currNode->_element;
13013 if ( _classAttrId==0 ) {
13014 _classAttrId = _document->getAttrNameIndex(U"class");
13015 }
13016 if ( overrideExisting || !node->hasAttribute(_classAttrId) ) {
13017 node->setAttributeValue(LXML_NS_NONE, _classAttrId, className);
13018 }
13019 }
13020
appendStyle(const lChar32 * style)13021 void ldomDocumentWriterFilter::appendStyle( const lChar32 * style )
13022 {
13023 ldomNode * node = _currNode->_element;
13024 if ( _styleAttrId==0 ) {
13025 _styleAttrId = _document->getAttrNameIndex(U"style");
13026 }
13027 // Append to the style attribute even if embedded styles are disabled
13028 // at loading time, otherwise it won't be there if we enable them later
13029 // if (!_document->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES))
13030 // return; // disabled
13031
13032 lString32 oldStyle = node->getAttributeValue(_styleAttrId);
13033 if ( !oldStyle.empty() && oldStyle.at(oldStyle.length()-1)!=';' )
13034 oldStyle << "; ";
13035 oldStyle << style;
13036 node->setAttributeValue(LXML_NS_NONE, _styleAttrId, oldStyle.c_str());
13037 }
13038
13039 // Legacy auto close handler (gDOMVersionRequested < 20200824)
AutoClose(lUInt16 tag_id,bool open)13040 void ldomDocumentWriterFilter::AutoClose( lUInt16 tag_id, bool open )
13041 {
13042 lUInt16 * rule = _rules[tag_id];
13043 if ( !rule )
13044 return;
13045 if ( open ) {
13046 ldomElementWriter * found = NULL;
13047 ldomElementWriter * p = _currNode;
13048 while ( p && !found ) {
13049 lUInt16 id = p->_element->getNodeId();
13050 for ( int i=0; rule[i]; i++ ) {
13051 if ( rule[i]==id ) {
13052 found = p;
13053 break;
13054 }
13055 }
13056 p = p->_parent;
13057 }
13058 // found auto-close target
13059 if ( found != NULL ) {
13060 bool done = false;
13061 while ( !done && _currNode ) {
13062 if ( _currNode == found )
13063 done = true;
13064 ldomNode * closedElement = _currNode->getElement();
13065 _currNode = pop( _currNode, closedElement->getNodeId() );
13066 //ElementCloseHandler( closedElement );
13067 }
13068 }
13069 } else {
13070 if ( !rule[0] )
13071 _currNode = pop( _currNode, _currNode->getElement()->getNodeId() );
13072 }
13073 }
13074
13075 // With gDOMVersionRequested >= 20200824, we use hardcoded rules
13076 // for opening and closing tags, trying to follow what's relevant
13077 // in the HTML Living Standard (=HTML5):
13078 // https://html.spec.whatwg.org/multipage/parsing.html
13079 // A less frightening introduction is available at:
13080 // https://htmlparser.info/parser/
13081 //
13082 // Note that a lot of rules and checks in the algorithm are for
13083 // noticing "parser errors", with usually a fallback of ignoring
13084 // it and going on.
13085 // We ensure one tedious requirement: foster parenting of non-table
13086 // elements met while building a table, mostly to not have mis-nested
13087 // content simply ignored and not shown to the user.
13088 // Other tedious requirements not ensured might just have some impact
13089 // on the styling of the content, which should be a minor issue.
13090 //
13091 // It feels that we can simplify it to the following implementation,
13092 // with possibly some cases not handled related to:
13093 // - FORM and form elements (SELECT, INPUT, OPTION...)
13094 // - TEMPLATE, APPLET, OBJECT, MARQUEE
13095 // - Mis-nested HTML/BODY/HEAD
13096 // - Reconstructing the active formatting elements (B, I...) when
13097 // mis-nested or "on hold" when entering block or table elements.
13098 // - The "adoption agency algorithm" for mis-nested formatting
13099 // elements (and nested <A>)
13100 // - We may not ignore some opening tag that we normally should
13101 // (like HEAD or FRAME when in BODY) (but we ignore a standalone
13102 // sub-table element when not inside a TABLE) as this would
13103 // complicate the internal parser state.
13104 //
13105 // Of interest:
13106 // https://html.spec.whatwg.org/multipage/parsing.html#parse-state
13107 // List of "special" elements
13108 // List of elements for rules "have a particular element in X scope"
13109 // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
13110 // Specific rules when start or end tag of specific elements is met
13111
13112 // Scope are for limiting ancestor search when looking for a previous
13113 // element to close (a closing tag may be ignored if no opening tag is
13114 // found in the specified scope)
13115 enum ScopeType {
13116 HTML_SCOPE_NONE = 0, // no stop tag
13117 HTML_SCOPE_MAIN, // HTML, TABLE, TD, TH, CAPTION, APPLET, MARQUEE, OBJECT, TEMPLATE
13118 HTML_SCOPE_LIST_ITEM, // = SCOPE_MAIN + OL, UL
13119 HTML_SCOPE_BUTTON, // = SCOPE_MAIN + BUTTON (not used, only used with P that we handle specifically)
13120 HTML_SCOPE_TABLE, // HTML, TABLE, TEMPLATE
13121 HTML_SCOPE_SELECT, // All elements stop, except OPTGROUP, OPTION
13122 HTML_SCOPE_SPECIALS, // All specials elements (inline don't close across block/specials elements)
13123 // Next ones are scopes with specific behaviours that may ignore target_id
13124 HTML_SCOPE_OPENING_LI, // = SCOPE_SPECIALS, minus ADDRESS, DIV, P: close any LI
13125 HTML_SCOPE_OPENING_DT_DD, // = SCOPE_SPECIALS, minus ADDRESS, DIV, P: close any DT/DD
13126 HTML_SCOPE_OPENING_H1_H6, // = close current node if H1, H2, H3, H4, H5, H6
13127 HTML_SCOPE_CLOSING_H1_H6, // = SCOPE_MAIN: close any of H1..H6
13128 HTML_SCOPE_TABLE_TO_TOP, // = SCOPE_TABLE: close all table sub-elements to end up being TABLE
13129 HTML_SCOPE_TABLE_OPENING_TD_TH, // = SCOPE_TABLE: close any TD/TH
13130 };
13131 // Note: as many elements close a P, we don't handle checking and closing them
13132 // via popUpTo(NULL, el_p, HTML_SCOPE_BUTTON), but we keep the last P as _lastP
13133 // so we can just popUpTo(_lastP) if set when meeting a "close a P" element.
13134
13135 // Boxing elements (id < el_DocFragment) (and DocFragment itself,
13136 // not used with the HTMLParser) are normally added by crengine
13137 // after "delete ldomElementWriter" (which calls onBodyExit()
13138 // which calls initNodeRendMethod()), so after we have closed
13139 // and pass by the element.
13140 // So, we shouldn't meet any in popUpTo() and don't have to wonder
13141 // if we should stop at them, or pass by them.
13142
popUpTo(ldomElementWriter * target,lUInt16 target_id,int scope)13143 lUInt16 ldomDocumentWriterFilter::popUpTo( ldomElementWriter * target, lUInt16 target_id, int scope )
13144 {
13145 if ( !target ) {
13146 // Check if there's an element with provided target_id in the stack inside this scope
13147 ldomElementWriter * tmp = _currNode;
13148 while ( tmp ) {
13149 lUInt16 tmpId = tmp->getElement()->getNodeId();
13150 if ( tmpId < el_DocFragment && tmpId > el_NULL) {
13151 // We shouldn't meet any (see comment above)
13152 // (but we can meet the root node when poping </html>)
13153 crFatalError( 127, "Unexpected boxing element met in ldomDocumentWriterFilter::popUpTo()" );
13154 }
13155 if ( target_id && tmpId == target_id )
13156 break;
13157 if ( _curFosteredNode && tmp == _curFosteredNode ) {
13158 // If fostering and we're not closing the fostered node itself,
13159 // don't go at closing stuff above the fostered node
13160 tmp = NULL;
13161 break;
13162 }
13163 // Check scope stop tags
13164 bool stop = false;
13165 switch (scope) {
13166 case HTML_SCOPE_MAIN: // stop at HTML/TABLE/TD...
13167 if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13168 tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ) {
13169 tmp = NULL;
13170 stop = true;
13171 }
13172 break;
13173 case HTML_SCOPE_LIST_ITEM: // stop at SCOPE_MAIN + OL, UL
13174 if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13175 tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ||
13176 tmpId == el_ol || tmpId == el_ul ) {
13177 tmp = NULL;
13178 stop = true;
13179 }
13180 break;
13181 case HTML_SCOPE_BUTTON: // stop at SCOPE_MAIN + BUTTON
13182 if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13183 tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ||
13184 tmpId == el_button ) {
13185 tmp = NULL;
13186 stop = true;
13187 }
13188 break;
13189 case HTML_SCOPE_TABLE: // stop at HTML and TABLE
13190 if ( tmpId == el_html || tmpId == el_table || tmpId == el_template ) {
13191 tmp = NULL;
13192 stop = true;
13193 }
13194 break;
13195 case HTML_SCOPE_SELECT:
13196 // This one is different: all elements stop it, except optgroup and option
13197 if ( tmpId != el_optgroup && tmpId != el_option ) {
13198 tmp = NULL;
13199 stop = true;
13200 }
13201 break;
13202 case HTML_SCOPE_SPECIALS: // stop at any "special" element
13203 if ( tmpId >= EL_SPECIAL_START && tmpId <= EL_SPECIAL_END ) {
13204 tmp = NULL;
13205 stop = true;
13206 }
13207 break;
13208 case HTML_SCOPE_OPENING_LI:
13209 if ( tmpId == el_li ) {
13210 stop = true;
13211 }
13212 else if ( tmpId >= EL_SPECIAL_START && tmpId <= EL_SPECIAL_END &&
13213 tmpId != el_div && tmpId != el_p && tmpId != el_address ) {
13214 tmp = NULL;
13215 stop = true;
13216 }
13217 break;
13218 case HTML_SCOPE_OPENING_DT_DD:
13219 if ( tmpId == el_dt || tmpId == el_dd ) {
13220 stop = true;
13221 }
13222 else if ( tmpId >= EL_SPECIAL_START && tmpId <= EL_SPECIAL_END &&
13223 tmpId != el_div && tmpId != el_p && tmpId != el_address ) {
13224 tmp = NULL;
13225 stop = true;
13226 }
13227 break;
13228 case HTML_SCOPE_OPENING_H1_H6:
13229 // Close immediate parent H1...H6, but don't walk up
13230 // <H3> ... <H4> : H4 will close H3
13231 // <H3> ... <B> ... <H4> : H4 will not close H3
13232 if ( tmpId < el_h1 || tmpId > el_h6 ) {
13233 tmp = NULL; // Nothing to close
13234 }
13235 stop = true; // Don't check upper
13236 break;
13237 case HTML_SCOPE_CLOSING_H1_H6:
13238 if ( tmpId >= el_h1 && tmpId <= el_h6 ) {
13239 stop = true;
13240 }
13241 else if ( tmpId == el_html || tmpId == el_table || tmpId == el_td || tmpId == el_th || tmpId == el_caption ||
13242 tmpId == el_applet || tmpId == el_marquee || tmpId == el_object || tmpId == el_template ) {
13243 tmp = NULL;
13244 stop = true;
13245 }
13246 break;
13247 case HTML_SCOPE_TABLE_TO_TOP:
13248 if ( tmp->_parent && tmp->_parent->getElement()->getNodeId() == el_table ) {
13249 stop = true;
13250 }
13251 else if ( tmpId == el_html || tmpId == el_table || tmpId == el_template ) {
13252 tmp = NULL;
13253 stop = true;
13254 }
13255 break;
13256 case HTML_SCOPE_TABLE_OPENING_TD_TH:
13257 if ( tmpId == el_td || tmpId == el_th ) {
13258 stop = true;
13259 }
13260 else if ( tmpId == el_html || tmpId == el_table || tmpId == el_template ) {
13261 tmp = NULL;
13262 stop = true;
13263 }
13264 break;
13265 case HTML_SCOPE_NONE:
13266 default:
13267 // Never stop, continue up to root node
13268 break;
13269 }
13270 if ( stop )
13271 break;
13272 tmp = tmp->_parent;
13273 }
13274 target = tmp; // (NULL if not found, NULL or not if stopped)
13275 }
13276 if ( target ) {
13277 // Assume target is valid and will be found
13278 while ( _currNode ) {
13279 // Update state for after this node is closed
13280 lUInt16 curNodeId = _currNode->getElement()->getNodeId();
13281 // Reset these flags if we see again these tags (so to
13282 // at least reconstruct </html><html><body><hr> when
13283 // meeting </html><hr> and have el_html as the catch all
13284 // element in SCOPEs working.
13285 if ( curNodeId == el_body ) {
13286 _bodyTagSeen = false;
13287 }
13288 else if ( curNodeId == el_html ) {
13289 _headTagSeen = false;
13290 _htmlTagSeen = false;
13291 }
13292 if ( _lastP && _currNode == _lastP )
13293 _lastP = NULL;
13294 ldomElementWriter * tmp = _currNode;
13295 bool done = _currNode == target;
13296 if ( _curFosteredNode && _currNode == _curFosteredNode ) {
13297 // If we meet the fostered node, have it closed but don't
13298 // go at closing above it
13299 done = true;
13300 _currNode = _curNodeBeforeFostering;
13301 _curNodeBeforeFostering = NULL;
13302 _curFosteredNode = NULL;
13303 }
13304 else {
13305 _currNode = _currNode->_parent;
13306 }
13307 ElementCloseHandler( tmp->getElement() );
13308 delete tmp;
13309 if ( done )
13310 break;
13311 }
13312 }
13313 return _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13314 }
13315
13316 // To give as first parameter to AutoOpenClosePop()
13317 enum ParserStepType {
13318 PARSER_STEP_TAG_OPENING = 1,
13319 PARSER_STEP_TAG_CLOSING,
13320 PARSER_STEP_TAG_SELF_CLOSING,
13321 PARSER_STEP_TEXT
13322 };
13323
13324 // More HTML5 conforming auto close handler (gDOMVersionRequested >= 20200824)
AutoOpenClosePop(int step,lUInt16 tag_id)13325 bool ldomDocumentWriterFilter::AutoOpenClosePop( int step, lUInt16 tag_id )
13326 {
13327 lUInt16 curNodeId = _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13328 if ( !_bodyTagSeen && ( step == PARSER_STEP_TAG_OPENING || step == PARSER_STEP_TEXT) ) {
13329 // Create some expected containing elements if not yet seen
13330 if ( !_headTagSeen ) {
13331 if ( !_htmlTagSeen ) {
13332 _htmlTagSeen = true;
13333 if ( tag_id != el_html ) {
13334 OnTagOpen(U"", U"html");
13335 OnTagBody();
13336 }
13337 }
13338 if ( (tag_id >= EL_IN_HEAD_START && tag_id <= EL_IN_HEAD_END) || tag_id == el_noscript ) {
13339 _headTagSeen = true;
13340 if ( tag_id != el_head ) {
13341 OnTagOpen(U"", U"head");
13342 OnTagBody();
13343 }
13344 }
13345 curNodeId = _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13346 }
13347 if ( tag_id >= EL_IN_BODY_START || (step == PARSER_STEP_TEXT && (curNodeId == el_html || curNodeId == el_head)) ) {
13348 // Tag usually found inside <body>, or text while being <HTML> or <HEAD>
13349 // (text while being in <HTML><HEAD><TITLE> should not trigger this):
13350 // end of <head> and start of <body>
13351 if ( _headTagSeen )
13352 OnTagClose(U"", U"head");
13353 else
13354 _headTagSeen = true; // We won't open any <head> anymore
13355 _bodyTagSeen = true;
13356 if ( tag_id != el_body ) {
13357 OnTagOpen(U"", U"body");
13358 OnTagBody();
13359 }
13360 curNodeId = _currNode ? _currNode->getElement()->getNodeId() : el_NULL;
13361 }
13362 }
13363 if ( step == PARSER_STEP_TEXT ) // new text: nothing more to do
13364 return true;
13365
13366 bool is_self_closing_tag = false;
13367 switch (tag_id) {
13368 // These are scaterred among different ranges, so we sadly
13369 // can't use any range comparisons
13370 case el_area:
13371 case el_base:
13372 case el_br:
13373 case el_col:
13374 case el_embed:
13375 case el_hr:
13376 case el_img:
13377 case el_input:
13378 case el_link:
13379 case el_meta:
13380 case el_param:
13381 case el_source:
13382 case el_track:
13383 case el_wbr:
13384 is_self_closing_tag = true;
13385 break;
13386 default:
13387 break;
13388 }
13389
13390 if ( step == PARSER_STEP_TAG_OPENING ) {
13391 // A new element with tag_id will be created after we return
13392 // We should
13393 // - create implicit parent elements for tag_id if not present (partially
13394 // done for HTML/HEAD/BODY elements above)
13395 // - close elements that should be closed by this tag_id (some with optional
13396 // end tags and others that the spec says so)
13397 // - keep a note if it's self-closing so we can close it when appropriate
13398 // - ignore this opening tag in some cases
13399
13400 // Table elements can be ignored, create missing elements
13401 // and/or close some others
13402 if ( tag_id == el_th || tag_id == el_td ) {
13403 // Close any previous TD/TH in table scope if any
13404 curNodeId = popUpTo(NULL, 0, HTML_SCOPE_TABLE_OPENING_TD_TH);
13405 // We should be in a table or sub-table element
13406 // (a standalone TD is ignored)
13407 if ( curNodeId < el_table || curNodeId > el_tr )
13408 return false; // Not in a table context: ignore this TD/TH
13409 // We must be in a TR. If we're not, have missing elements created
13410 if ( curNodeId != el_tr ) {
13411 // This will create all the other missing elements if needed
13412 OnTagOpen(U"", U"tr");
13413 OnTagBody();
13414 }
13415 }
13416 else if ( tag_id == el_tr ) {
13417 // Close any previous TR in table scope if any
13418 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_TABLE);
13419 // We should be in a table or sub-table element
13420 // (a standalone TR is ignored)
13421 if ( curNodeId < el_table || curNodeId > el_tfoot )
13422 return false; // Not in a table context: ignore this TR
13423 // We must be in a THEAD/TBODY/TFOOT. If we're not, have missing elements created
13424 if ( curNodeId < el_thead || curNodeId > el_tfoot ) {
13425 // This will create all the other missing elements if needed
13426 OnTagOpen(U"", U"tbody");
13427 OnTagBody();
13428 }
13429 }
13430 else if ( tag_id == el_col ) {
13431 // Close any previous COL in table scope if any
13432 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_TABLE);
13433 // We should be in a table or sub-table element
13434 if ( curNodeId < el_table || curNodeId > el_td )
13435 return false; // Not in a table context: ignore this TR
13436 // We must be in a COLGROUP. If we're not, have missing elements created
13437 if ( curNodeId != el_colgroup ) {
13438 // This will create all the other missing elements if needed
13439 OnTagOpen(U"", U"colgroup");
13440 OnTagBody();
13441 }
13442 }
13443 else if ( (tag_id >= el_thead && tag_id <= el_tfoot) ||
13444 tag_id == el_caption ||
13445 tag_id == el_colgroup ) {
13446 // Close any previous THEAD/TBODY/TFOOT/CAPTION/COLGROUP/COL in table scope if any
13447 curNodeId = popUpTo(NULL, 0, HTML_SCOPE_TABLE_TO_TOP);
13448 // We should be in a table element
13449 if ( curNodeId != el_table )
13450 return false; // Not in a table context
13451 }
13452
13453 if ( tag_id == el_li ) {
13454 // A LI should close any previous LI, but should stop at specials
13455 // except ADDRESS, DIV and P (they will so stop at UL/OL and won't
13456 // close any upper LI that had another level of list opened).
13457 // Once that LI close, they should also close any P, which will
13458 // be taken care by followup check.
13459 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_OPENING_LI);
13460 }
13461 else if ( tag_id == el_dt || tag_id == el_dd ) {
13462 curNodeId = popUpTo(NULL, 0, HTML_SCOPE_OPENING_DT_DD);
13463 }
13464 else if ( tag_id == el_select ) {
13465 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_SELECT);
13466 }
13467 if ( _lastP && tag_id >= EL_SPECIAL_CLOSING_P_START && tag_id <= EL_SPECIAL_CLOSING_P_END ) {
13468 // All these should close a P "in button scope", meaning until a parent
13469 // with these tag names is met:
13470 // html, table, td, th, caption, applet, marquee, object, template
13471 // These should all have closed any previous P when opened, except
13472 // applet, marquee, object, template - but to simplify things, we
13473 // made them close a P too. So, _lastP is always "in button scope".
13474 curNodeId = popUpTo(_lastP); // will set _lastP = NULL
13475 // Note: in "quirks mode", a TABLE should not close a P (should
13476 // we force this behaviour on old CHM files ? Having the table
13477 // close a P when it shouldn't will make the following text out
13478 // of P and possibly not styled as P).
13479 }
13480 if ( tag_id >= el_h1 && tag_id <= el_h6 ) {
13481 // After possibly closing a P, H1...H6 close any H1...H6 direct ancestor
13482 curNodeId = popUpTo(NULL, 0, HTML_SCOPE_OPENING_H1_H6);
13483 }
13484 else if ( curNodeId == el_option && (tag_id == el_optgroup || tag_id == el_option) ) {
13485 // Close previous option
13486 curNodeId = popUpTo(_currNode);
13487 }
13488 else if ( tag_id >= el_rbc && tag_id <= el_rp ) { // ruby sub-elements
13489 // The HTML5 specs says that:
13490 // - we should do that only if there is a RUBY in scope (but we don't check that)
13491 // - RB and RTC should close implied end tags, meaning: RB RP RT RTC
13492 // - RP and RT should close implied end tags except RTC, meaning: RB RP RT
13493 // But they don't mention the old <RBC> that we want to support
13494 // If we do, we end up with these rules (x for HTML specs, o for our added RBC support)
13495 // tags to close
13496 // tag_id RBC RB RTC RT RP
13497 // RBC o o o o o
13498 // RB x x x x
13499 // RTC o x x x x
13500 // RT o x x x
13501 // RP o x x x
13502 if ( tag_id == el_rbc || tag_id == el_rtc ) {
13503 while ( curNodeId >= el_rbc && curNodeId <= el_rp ) {
13504 curNodeId = popUpTo(_currNode);
13505 }
13506 }
13507 else if ( tag_id == el_rb ) {
13508 while ( curNodeId >= el_rb && curNodeId <= el_rp ) {
13509 curNodeId = popUpTo(_currNode);
13510 }
13511 }
13512 else { // el_rt || el_rp
13513 while ( curNodeId >= el_rbc && curNodeId <= el_rp && curNodeId != el_rtc) {
13514 curNodeId = popUpTo(_currNode);
13515 }
13516 }
13517 }
13518
13519 // Self closing will be handled in OnTagBody
13520 _curNodeIsSelfClosing = is_self_closing_tag;
13521 }
13522 else if ( step == PARSER_STEP_TAG_CLOSING || step == PARSER_STEP_TAG_SELF_CLOSING ) { // Closing, </tag_id> or <tag_id/>
13523 // We are responsible for poping up to and closing the provided tag_id,
13524 // or ignoring it if stopped or not found.
13525 if ( is_self_closing_tag ) {
13526 // We can ignore this closing tag, except in one case:
13527 // a standalone closing </BR> (so, not self closing)
13528 // Specs say we should insert a new/another one
13529 if ( tag_id == el_br && step == PARSER_STEP_TAG_CLOSING ) {
13530 OnTagOpen(U"", U"br");
13531 OnTagBody();
13532 OnTagClose(U"", U"br", true);
13533 return true;
13534 }
13535 return false; // ignored
13536 }
13537 if ( tag_id == curNodeId ) {
13538 // If closing current node, no need for more checks
13539 popUpTo(_currNode);
13540 return true;
13541 }
13542 if ( tag_id == el_p && !_lastP ) {
13543 // </P> without any previous <P> should emit <P></P>
13544 // Insert new one and pop it
13545 OnTagOpen(U"", U"p");
13546 OnTagBody();
13547 popUpTo(_currNode);
13548 return true;
13549 }
13550 if ( tag_id > EL_SPECIAL_END ) {
13551 // Inline elements don't close across specials
13552 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_SPECIALS);
13553 }
13554 else if ( tag_id >= el_h1 && tag_id <= el_h6 ) {
13555 // A closing Hn closes any other Hp
13556 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_CLOSING_H1_H6);
13557 }
13558 else if ( tag_id == el_li ) {
13559 // </li> shouldn't close across OL/UL
13560 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_LIST_ITEM);
13561 // Note: dt/dd (which have the same kind of auto-close previous
13562 // as LI for the opening tag) do not have any restriction, and
13563 // will use HTML_SCOPE_MAIN below
13564 }
13565 else if ( tag_id >= el_table && tag_id <= el_td ) {
13566 // Table sub-element: don't cross TABLE
13567 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_TABLE);
13568 }
13569 else if ( tag_id >= EL_SPECIAL_START ) {
13570 // All other "specials" close across nearly everything
13571 // except TABLE/TH/TD/CAPTION
13572 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_MAIN);
13573 }
13574 else {
13575 // Boxing elements are normally added by crengine after
13576 // "delete ldomElementWriter" (which calls onBodyExit()
13577 // which calls initNodeRendMethod()), so after we have
13578 // closed and pass by the element.
13579 // So, we shouldn't meet any.
13580 // But logically, they shouldn't have any limitation
13581 curNodeId = popUpTo(NULL, tag_id, HTML_SCOPE_NONE);
13582 }
13583 // SELECT should close any previous SELECT in HTML_SCOPE_SELECT,
13584 // which should contain only OPTGROUP and OPTION, but we don't
13585 // ensure that. So, we don't ensure this closing restriction.
13586 }
13587
13588 // (Silences clang warning about 'curNodeId' is never read, if we
13589 // happen to not had the need to re-check it - but better to keep
13590 // updating it if we later add stuff that does use it)
13591 (void)curNodeId;
13592
13593 return true;
13594 }
CheckAndEnsureFosterParenting(lUInt16 tag_id)13595 bool ldomDocumentWriterFilter::CheckAndEnsureFosterParenting(lUInt16 tag_id)
13596 {
13597 if ( !_currNode )
13598 return false;
13599 lUInt16 curNodeId = _currNode->getElement()->getNodeId();
13600 if ( curNodeId >= el_table && curNodeId <= el_tr && curNodeId != el_caption ) {
13601 if ( tag_id < el_table || tag_id > el_td ) {
13602 // Non table sub-element met as we expect only a table sub-element.
13603 // Ensure foster parenting: this node (and its content) is to be
13604 // inserted as a previous sibling of the table element we are in
13605 _curNodeBeforeFostering = NULL;
13606 // Look for the containing table element
13607 ldomElementWriter * elem = _currNode;
13608 while ( elem ) {
13609 if ( elem->getElement()->getNodeId() == el_table ) {
13610 break;
13611 }
13612 elem = elem->_parent;
13613 }
13614 if ( elem ) { // found it
13615 _curNodeBeforeFostering = _currNode;
13616 _currNode = elem->_parent; // parent of table
13617 return true; // Insert the new element in _currNode (the parent of this
13618 // table), before its last child (which is this table)
13619 }
13620 }
13621 // We're in a table, and we see an expected sub-table element: all is fine
13622 return false;
13623 }
13624 else if ( _curFosteredNode ) {
13625 // We've been foster parenting: if we see a table sub-element,
13626 // stop foster parenting and restore the original noce
13627 if ( tag_id >= el_table && tag_id <= el_td ) {
13628 popUpTo(_curFosteredNode);
13629 // popUpTo() has restored _currNode to _curNodeBeforeFostering and
13630 // reset _curFosteredNode and _curNodeBeforeFostering to NULL
13631 }
13632 }
13633 return false;
13634 }
13635
OnTagOpen(const lChar32 * nsname,const lChar32 * tagname)13636 ldomNode * ldomDocumentWriterFilter::OnTagOpen( const lChar32 * nsname, const lChar32 * tagname )
13637 {
13638 // We expect from the parser to always have OnTagBody called
13639 // after OnTagOpen before any other OnTagOpen
13640 if ( !_tagBodyCalled ) {
13641 CRLog::error("OnTagOpen w/o parent's OnTagBody : %s", LCSTR(lString32(tagname)));
13642 crFatalError();
13643 }
13644 // _tagBodyCalled = false;
13645 // We delay setting _tagBodyCalled=false to below as we may create
13646 // additional wrappers before inserting this new element
13647
13648 lUInt16 id = _document->getElementNameIndex(tagname);
13649 lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
13650
13651 // http://lib.ru/ books detection (a bit ugly to have this hacked
13652 // into ldomDocumentWriterFilter, but well, it's been there for ages
13653 // and it seems quite popular and expected to have crengine handle
13654 // Lib.ru books without any conversion needed).
13655 // Detection has been reworked to be done here (in OnTagOpen). It
13656 // was previously done in ElementCloseHandler/OnTagClose when closing
13657 // the elements, and as it removed the FORM node from the DOM, it
13658 // caused a display hash mismatch which made the cache invalid.
13659 // So, do it here and don't remove any node but make then hidden.
13660 // Lib.ru books (in the 2 formats that are supported, "Lib.ru html"
13661 // and "Fine HTML"), have this early in the document:
13662 // <div align=right><form action=/INPROZ/ASTURIAS/asturias1_1.txt><select name=format><OPTION...>
13663 // Having a FORM child of a DIV with align=right is assumed to be
13664 // quite rare, so check for that.
13665 bool setDisplayNone = false;
13666 bool setParseAsPre = false;
13667 if ( _libRuDocumentToDetect && id == el_form ) {
13668 // At this point _currNode is still the parent of the FORM that is opening
13669 if ( _currNode && _currNode->_element->getNodeId() == el_div ) {
13670 ldomNode * node = _currNode->_element;
13671 lString32 style = node->getAttributeValue(attr_style);
13672 // align=right would have been translated to style="text-align: right"
13673 if ( !style.empty() && style.pos("text-align: right", 0) >= 0 ) {
13674 _libRuDocumentDetected = true;
13675 // We can't set this DIV to be display:none as the element
13676 // has already had setNodeStyle() called and applied, so
13677 // it would take effect only on re-renderings (and would
13678 // cause a display hash mismatch).
13679 // So, we'll set it on the FORM just after it's created below
13680 setDisplayNone = true;
13681 }
13682 }
13683 // If the first FORM met doesn't match, no need keep detecting
13684 _libRuDocumentToDetect = false;
13685 }
13686 // Fixed 20180503: this was done previously in any case, but now only
13687 // if _libRuDocumentDetected. We still allow the old behaviour if
13688 // requested to keep previously recorded XPATHs valid.
13689 if ( _libRuDocumentDetected || _document->getDOMVersionRequested() < 20180503) {
13690 // Patch for bad LIB.RU books - BR delimited paragraphs
13691 // in "Fine HTML" format, that appears as:
13692 // <br> Viento fuerte, 1950
13693 // <br> Spellcheck [..., with \n every 76 chars]
13694 if ( id == el_br || id == el_dd ) {
13695 // Replace such BR with P
13696 id = el_p;
13697 _libRuParagraphStart = true; // to trim leading
13698 } else {
13699 _libRuParagraphStart = false;
13700 }
13701 if ( _libRuDocumentDetected && id == el_pre ) {
13702 // "Lib.ru html" format is actually minimal HTML with
13703 // the text wrapped in <PRE>. We will parse this text
13704 // to build proper HTML with each paragraph wrapped
13705 // in a <P> (this is done by the XMLParser when we give
13706 // it TXTFLG_PRE_PARA_SPLITTING).
13707 // Once that is detected, we don't want it to be PRE
13708 // anymore (so that on re-renderings, it's not handled
13709 // as white-space: pre), so we're swapping this PRE with
13710 // a DIV element. But we need to still parse the text
13711 // when building the DOM as PRE.
13712 id = el_div;
13713 ldomNode * n = _currNode ? _currNode->getElement() : NULL;
13714 if ( n && n->getNodeId() == el_pre ) {
13715 // Also close any previous PRE that would have been
13716 // auto-closed if we kept PRE as PRE (from now on,
13717 // we'll convert PRE to DIV), as this unclosed PRE
13718 // would apply to all the text.
13719 _currNode = pop( _currNode, el_pre);
13720 }
13721 else if ( n && n->getNodeId() == el_div && n->hasAttribute( attr_ParserHint ) &&
13722 n->getAttributeValue( attr_ParserHint ) == U"ParseAsPre" ) {
13723 // Also close any previous PRE we already masqueraded
13724 // as <DIV ParserHint="ParseAsPre">
13725 _currNode = pop( _currNode, el_div);
13726 }
13727 // Below, we'll then be inserting a DIV, which won't be TXTFLG_PRE.
13728 // We'll need to re-set _flags to be TXTFLG_PRE in our OnTagBody(),
13729 // after it has called the superclass's OnTagBody(),
13730 // as ldomDocumentWriter::OnTagBody() will call onBodyEnter() which
13731 // will have set default styles (so, not TXTFLG_PRE for DIV as its
13732 // normal style is "white-space: normal").
13733 // We'll add the attribute ParserHint="ParseAsPre" below so
13734 // we know it was a PRE and do various tweaks.
13735 setParseAsPre = true;
13736 }
13737 }
13738
13739 bool tag_accepted = true;
13740 bool insert_before_last_child = false;
13741 if (_document->getDOMVersionRequested() >= 20200824) { // A little bit more HTML5 conformance
13742 if ( id == el_image )
13743 id = el_img;
13744 if ( tagname && tagname[0] == '?' ) {
13745 // The XML parser feeds us XML processing instructions like '<?xml ... ?>'
13746 // Firefox wraps them in a comment <!--?xml ... ?-->.
13747 // As we ignore comments, ignore them too.
13748 tag_accepted = false;
13749 }
13750 else if ( CheckAndEnsureFosterParenting(id) ) {
13751 // https://html.spec.whatwg.org/multipage/parsing.html#foster-parent
13752 // If non-sub-table element opening while we're still
13753 // inside sub-table non-TD/TH elements, we should
13754 // do foster parenting: insert the node as the previous
13755 // sibling of the TABLE element we're dealing with
13756 insert_before_last_child = true;
13757 // As we'll be inserting a node before the TABLE, which
13758 // already had its style applied, some CSS selectors matches
13759 // might no more be valid (i.e. :first-child, DIV + TABLE),
13760 // so styles could change on the next re-rendering.
13761 // We don't check if we actually had such selectors as that
13762 // is complicated from here: we just set styles to be invalid
13763 // so they are re-computed once the DOM is fully built.
13764 _document->setNodeStylesInvalidIfLoading();
13765 }
13766 else {
13767 tag_accepted = AutoOpenClosePop( PARSER_STEP_TAG_OPENING, id );
13768 }
13769 }
13770 else {
13771 AutoClose( id, true );
13772 }
13773
13774 // Set a flag for OnText to accumulate the content of any <HEAD><STYLE>
13775 // (We do that after the autoclose above, so that with <HEAD><META><STYLE>,
13776 // the META is properly closed and we find HEAD as the current node.)
13777 if ( id == el_style && _currNode && _currNode->getElement()->getNodeId() == el_head ) {
13778 _inHeadStyle = true;
13779 }
13780
13781 // From now on, we don't create/close any elements, so expect
13782 // the next event to be OnTagBody (except OnTagAttribute)
13783 _tagBodyCalled = false;
13784
13785 if ( !tag_accepted ) {
13786 // Don't create the element
13787 // If not accepted, the HTML parser will still call OnTagBody, and might
13788 // call OnTagAttribute before that. We should ignore them until OnTagBody.
13789 // No issue with OnTagClose, that can usually ignore stuff.
13790 _curTagIsIgnored = true;
13791 return _currNode ? _currNode->getElement() : NULL;
13792 }
13793
13794 _currNode = new ldomElementWriter( _document, nsid, id, _currNode, insert_before_last_child );
13795 _flags = _currNode->getFlags();
13796
13797 if ( insert_before_last_child ) {
13798 _curFosteredNode = _currNode;
13799 }
13800
13801 if (_document->getDOMVersionRequested() >= 20200824 && id == el_p) {
13802 // To avoid checking DOM ancestors with the numerous tags that close a P
13803 _lastP = _currNode;
13804 }
13805
13806 // Some libRu tweaks:
13807 if ( setParseAsPre ) {
13808 // Set an attribute on the DIV we just added
13809 _currNode->getElement()->setAttributeValue(LXML_NS_NONE, attr_ParserHint, U"ParseAsPre");
13810 // And set this global flag as we'll need to re-enable PRE (as it
13811 // will be reset by ldomDocumentWriter::OnTagBody() as we won't have
13812 // proper CSS white-space:pre inheritance) and XMLParser flags.
13813 _libRuParseAsPre = true;
13814 }
13815 if ( setDisplayNone ) {
13816 // Hide the FORM that was used to detect libRu,
13817 // now that currNode is the FORM element
13818 appendStyle( U"display: none" );
13819 }
13820
13821 //logfile << " !o!\n";
13822 return _currNode->getElement();
13823 }
13824
13825 /// called after > of opening tag (when entering tag body)
13826 // Note to avoid confusion: all tags HAVE a body (their content), so this
13827 // is called on all tags.
OnTagBody()13828 void ldomDocumentWriterFilter::OnTagBody()
13829 {
13830 _tagBodyCalled = true;
13831 if ( _curTagIsIgnored ) {
13832 _curTagIsIgnored = false; // Done with this ignored tag
13833 // We don't want ldomDocumentWriter::OnTagBody() to re-init
13834 // the current node styles (as we ignored this element,
13835 // _currNode is the previous node, already initNodeStyle()'d)
13836 return;
13837 }
13838
13839 // This superclass OnTagBody() will initNodeStyle() on this node.
13840 // Some specific handling for the <BODY> tag to deal with HEAD STYLE
13841 // and LINK is also done there.
13842 ldomDocumentWriter::OnTagBody();
13843
13844 if ( _curNodeIsSelfClosing ) {
13845 // Now that styles are set, we can close the element
13846 // Let's have it closed properly with flags correctly re set, and so
13847 // that specific handling in OnTagClose() is done (ex. for <LINK>)
13848 OnTagClose(NULL, NULL, true);
13849 return;
13850 }
13851
13852 if ( _libRuDocumentDetected ) {
13853 if ( _libRuParseAsPre ) {
13854 // The OnTagBody() above might have cancelled TXTFLG_PRE
13855 // (that the ldomElementWriter inherited from its parent)
13856 // when ensuring proper CSS white-space inheritance.
13857 // Re-enable it
13858 _currNode->_flags |= TXTFLG_PRE;
13859 // Also set specific XMLParser flags so it spits out
13860 // <P>... for each paragraph of plain text, so that
13861 // we get some nice HTML instead
13862 _flags = TXTFLG_PRE | TXTFLG_PRE_PARA_SPLITTING | TXTFLG_TRIM;
13863 }
13864 }
13865 }
13866
OnAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)13867 void ldomDocumentWriterFilter::OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue )
13868 {
13869 //logfile << "ldomDocumentWriter::OnAttribute() [" << nsname << ":" << attrname << "]";
13870 //if ( nsname && nsname[0] )
13871 // lStr_lowercase( const_cast<lChar32 *>(nsname), lStr_len(nsname) );
13872 //lStr_lowercase( const_cast<lChar32 *>(attrname), lStr_len(attrname) );
13873
13874 //CRLog::trace("OnAttribute(%s, %s)", LCSTR(lString32(attrname)), LCSTR(lString32(attrvalue)));
13875
13876 if ( _curTagIsIgnored ) { // Ignore attributes if tag was ignored
13877 return;
13878 }
13879
13880 // ldomDocumentWriterFilter is used for HTML/CHM/PDB (not with EPUBs).
13881 // We translate some attributes (now possibly deprecated) to their
13882 // CSS style equivalent, globally or for some elements only.
13883 // https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
13884 lUInt16 id = _currNode->_element->getNodeId();
13885
13886 // Not sure this is to be done here: we get attributes as they are read,
13887 // so possibly before or after a style=, that the attribute may override.
13888 // Hopefully, a document use either one or the other.
13889 // (Alternative: in lvrend.cpp when used, as fallback when there is
13890 // none specified in node->getStyle().)
13891
13892 // HTML align= => CSS text-align:
13893 // Done for all elements, except IMG and TABLE (for those, it should
13894 // translate to float:left/right, which is ensured by epub.css)
13895 // Should this be restricted to some specific elements?
13896 if ( !lStr_cmp(attrname, "align") && (id != el_img) && (id != el_table) ) {
13897 lString32 align = lString32(attrvalue).lowercase();
13898 if ( align == U"justify")
13899 appendStyle( U"text-align: justify" );
13900 else if ( align == U"left")
13901 appendStyle( U"text-align: left" );
13902 else if ( align == U"right")
13903 appendStyle( U"text-align: right" );
13904 else if ( align == U"center")
13905 appendStyle( U"text-align: center" );
13906 return;
13907 }
13908
13909 // For the table & friends elements where we do support the following styles,
13910 // we translate these deprecated attributes to their style equivalents:
13911 //
13912 // HTML valign= => CSS vertical-align: only for TH & TD (as lvrend.cpp
13913 // only uses it with table cells (erm_final or erm_block))
13914 if (id == el_th || id == el_td) {
13915 // Default rendering for cells is valign=baseline
13916 if ( !lStr_cmp(attrname, "valign") ) {
13917 lString32 valign = lString32(attrvalue).lowercase();
13918 if ( valign == U"top" )
13919 appendStyle( U"vertical-align: top" );
13920 else if ( valign == U"middle" )
13921 appendStyle( U"vertical-align: middle" );
13922 else if ( valign == U"bottom")
13923 appendStyle( U"vertical-align: bottom" );
13924 return;
13925 }
13926 }
13927 // HTML width= => CSS width: only for TH, TD and COL (as lvrend.cpp
13928 // only uses it with erm_table_column and table cells)
13929 // Note: with IMG, lvtextfm LFormattedText::AddSourceObject() only uses
13930 // style, and not attributes: <img width=100 height=50> would not be used.
13931 if (id == el_th || id == el_td || id == el_col) {
13932 if ( !lStr_cmp(attrname, "width") ) {
13933 lString32 val = lString32(attrvalue);
13934 const lChar32 * s = val.c_str();
13935 bool is_pct = false;
13936 int n=0;
13937 if (s && s[0]) {
13938 for (int i=0; s[i]; i++) {
13939 if (s[i]>='0' && s[i]<='9') {
13940 n = n*10 + (s[i]-'0');
13941 } else if (s[i] == '%') {
13942 is_pct = true;
13943 break;
13944 }
13945 }
13946 if (n > 0) {
13947 val = lString32("width: ");
13948 val.appendDecimal(n);
13949 val += is_pct ? "%" : "px"; // CSS pixels
13950 appendStyle(val.c_str());
13951 }
13952 }
13953 return;
13954 }
13955 }
13956
13957 // Othewise, add the attribute
13958 lUInt16 attr_ns = (nsname && nsname[0]) ? _document->getNsNameIndex( nsname ) : 0;
13959 lUInt16 attr_id = (attrname && attrname[0]) ? _document->getAttrNameIndex( attrname ) : 0;
13960
13961 _currNode->addAttribute( attr_ns, attr_id, attrvalue );
13962
13963 //logfile << " !a!\n";
13964 }
13965
13966 /// called on closing tag
OnTagClose(const lChar32 *,const lChar32 * tagname,bool self_closing_tag)13967 void ldomDocumentWriterFilter::OnTagClose( const lChar32 * /*nsname*/, const lChar32 * tagname, bool self_closing_tag )
13968 {
13969 if ( !_tagBodyCalled ) {
13970 CRLog::error("OnTagClose w/o parent's OnTagBody : %s", LCSTR(lString32(tagname)));
13971 crFatalError();
13972 }
13973 if ( !_currNode || !_currNode->getElement() ) {
13974 _errFlag = true;
13975 return;
13976 }
13977
13978 //lUInt16 nsid = (nsname && nsname[0]) ? _document->getNsNameIndex(nsname) : 0;
13979 lUInt16 curNodeId = _currNode->getElement()->getNodeId();
13980 lUInt16 id = tagname ? _document->getElementNameIndex(tagname) : curNodeId;
13981 _errFlag |= (id != curNodeId); // (we seem to not do anything with _errFlag)
13982 // We should expect the tagname we got to be the same as curNode's element name,
13983 // but it looks like we may get an upper closing tag, that pop() or AutoClose()
13984 // below might handle. So, here below, we check that both id and curNodeId match
13985 // the element id we check for.
13986
13987 if ( _libRuDocumentToDetect && id == el_div ) {
13988 // No need to try detecting after we see a closing </DIV>,
13989 // as the FORM we look for is in the first DIV
13990 _libRuDocumentToDetect = false;
13991 }
13992 if ( _libRuDocumentDetected && id == el_pre ) {
13993 // Also, if we're about to close the original PRE that we masqueraded
13994 // as DIV and that has enabled _libRuParseAsPre, reset it.
13995 // (In Lib.ru books, it seems a PRE is never closed, or only at
13996 // the end by another PRE where it doesn't matter if we keep that flag.)
13997 ldomNode * n = _currNode->getElement();
13998 if ( n->getNodeId() == el_div && n->hasAttribute( attr_ParserHint ) &&
13999 n->getAttributeValue( attr_ParserHint ) == U"ParseAsPre" ) {
14000 _libRuParseAsPre = false;
14001 }
14002 }
14003
14004 // Parse <link rel="stylesheet">, put the css file link in _stylesheetLinks,
14005 // they will be added to <body><stylesheet> when we meet <BODY>
14006 // (duplicated in ldomDocumentWriter::OnTagClose)
14007 if ( id == el_link && curNodeId == el_link ) { // link node
14008 ldomNode * n = _currNode->getElement();
14009 if ( n->getParentNode() && n->getParentNode()->getNodeId() == el_head &&
14010 lString32(n->getAttributeValue("rel")).lowercase() == U"stylesheet" &&
14011 lString32(n->getAttributeValue("type")).lowercase() == U"text/css" ) {
14012 lString32 href = n->getAttributeValue("href");
14013 lString32 stylesheetFile = LVCombinePaths( _document->getCodeBase(), href );
14014 CRLog::debug("Internal stylesheet file: %s", LCSTR(stylesheetFile));
14015 // We no more apply it immediately: it will be when <BODY> is met
14016 // _document->setDocStylesheetFileName(stylesheetFile);
14017 // _document->applyDocumentStyleSheet();
14018 _stylesheetLinks.add(stylesheetFile);
14019 }
14020 }
14021
14022 // HTML title detection
14023 if ( id == el_title && curNodeId == el_title && _currNode->_element->getParentNode() &&
14024 _currNode->_element->getParentNode()->getNodeId() == el_head ) {
14025 lString32 s = _currNode->_element->getText();
14026 s.trim();
14027 if ( !s.empty() ) {
14028 // TODO: split authors, title & series
14029 _document->getProps()->setString( DOC_PROP_TITLE, s );
14030 }
14031 }
14032
14033 if (_document->getDOMVersionRequested() >= 20200824) { // A little bit more HTML5 conformance
14034 if ( _curNodeIsSelfClosing ) { // Internal call (not from XMLParser)
14035 _currNode = pop( _currNode, id );
14036 _curNodeIsSelfClosing = false;
14037 }
14038 else {
14039 if ( id == el_image )
14040 id = el_img;
14041 AutoOpenClosePop( self_closing_tag ? PARSER_STEP_TAG_SELF_CLOSING : PARSER_STEP_TAG_CLOSING, id );
14042 }
14043 }
14044 else {
14045 //======== START FILTER CODE ============
14046 AutoClose( curNodeId, false );
14047 //======== END FILTER CODE ==============
14048 // save closed element
14049 // ldomNode * closedElement = _currNode->getElement();
14050 _currNode = pop( _currNode, id );
14051 // _currNode is now the parent
14052 }
14053
14054 if ( _currNode ) {
14055 _flags = _currNode->getFlags();
14056 if ( _libRuParseAsPre ) {
14057 // Re-set specific parser flags
14058 _flags |= TXTFLG_PRE | TXTFLG_PRE_PARA_SPLITTING | TXTFLG_TRIM;
14059 }
14060 }
14061
14062 if ( id==_stopTagId ) {
14063 //CRLog::trace("stop tag found, stopping...");
14064 _parser->Stop();
14065 }
14066 //logfile << " !c!\n";
14067 }
14068
14069 /// called on text
OnText(const lChar32 * text,int len,lUInt32 flags)14070 void ldomDocumentWriterFilter::OnText( const lChar32 * text, int len, lUInt32 flags )
14071 {
14072 // Accumulate <HEAD><STYLE> content
14073 if (_inHeadStyle) {
14074 _headStyleText << lString32(text, len);
14075 _inHeadStyle = false;
14076 return;
14077 }
14078
14079 if (_document->getDOMVersionRequested() >= 20200824) { // A little bit more HTML5 conformance
14080 // We can get text before any node (it should then have <html><body> emited before it),
14081 // but we might get spaces between " <html> <head> <title>The title <br>The content".
14082 // Try to handle that correctly.
14083 if ( !_bodyTagSeen ) {
14084 // While not yet in BODY, when in HTML or HEAD, ignore empty
14085 // text (as non empty text will create BODY)
14086 if ( !_currNode || _currNode->getElement()->isRoot() ||
14087 _currNode->getElement()->getNodeId() == el_html ||
14088 _currNode->getElement()->getNodeId() == el_head ) {
14089 if ( !IsEmptySpace(text, len) ) {
14090 // Non-empty text: have implicit HTML or BODY tags created and HEAD closed
14091 AutoOpenClosePop( PARSER_STEP_TEXT, 0 );
14092 }
14093 }
14094 }
14095 }
14096 //logfile << "lxmlDocumentWriter::OnText() fpos=" << fpos;
14097 if (_currNode)
14098 {
14099 lUInt16 curNodeId = _currNode->getElement()->getNodeId();
14100 if (_document->getDOMVersionRequested() < 20200824) {
14101 AutoClose( curNodeId, false );
14102 }
14103 if ( (_flags & XML_FLAG_NO_SPACE_TEXT)
14104 && IsEmptySpace(text, len) && !(flags & TXTFLG_PRE))
14105 return;
14106 bool insert_before_last_child = false;
14107 if (_document->getDOMVersionRequested() >= 20200824) {
14108 // If we're inserting text while in table sub-elements that
14109 // don't accept text, have it foster parented
14110 if ( curNodeId >= el_table && curNodeId <= el_tr && curNodeId != el_caption ) {
14111 if ( !IsEmptySpace(text, len) ) {
14112 if ( CheckAndEnsureFosterParenting(el_NULL) ) {
14113 insert_before_last_child = true;
14114 }
14115 }
14116 }
14117 }
14118 else {
14119 // Previously, text in table sub-elements (only table elements and
14120 // self-closing elements have _allowText=false) had any text in between
14121 // table elements dropped (but not elements! with "<table>abc<div>def",
14122 // "abc" was dropped, but not "def")
14123 if ( !_currNode->_allowText )
14124 return;
14125 }
14126 if ( !_libRuDocumentDetected ) {
14127 _currNode->onText( text, len, flags, insert_before_last_child );
14128 }
14129 else { // Lib.ru text cleanup
14130 if ( _libRuParagraphStart ) {
14131 // Cleanup "Fine HTML": "<br> Viento fuerte, 1950"
14132 while ( *text==160 && len > 0 ) {
14133 text++;
14134 len--;
14135 while ( *text==' ' && len > 0 ) {
14136 text++;
14137 len--;
14138 }
14139 }
14140 _libRuParagraphStart = false;
14141 }
14142 // Handle "Lib.ru html" paragraph, parsed from the nearly plaintext
14143 // by XMLParser with TXTFLG_PRE | TXTFLG_PRE_PARA_SPLITTING | TXTFLG_TRIM
14144 bool autoPara = flags & TXTFLG_PRE;
14145 int leftSpace = 0;
14146 const lChar32 * paraTag = NULL;
14147 bool isHr = false;
14148 if ( autoPara ) {
14149 while ( (*text==' ' || *text=='\t' || *text==160) && len > 0 ) {
14150 text++;
14151 len--;
14152 leftSpace += (*text == '\t') ? 8 : 1;
14153 }
14154 paraTag = leftSpace > 8 ? U"h2" : U"p";
14155 lChar32 ch = 0;
14156 bool sameCh = true;
14157 for ( int i=0; i<len; i++ ) {
14158 if ( !ch )
14159 ch = text[i];
14160 // We would need this to have HR work:
14161 // else if ( i == len-1 && text[i] == ' ' ) {
14162 // // Ignore a trailing space we may get
14163 // // Note that some HR might be missed when the
14164 // // "----" directly follows some indented text.
14165 // }
14166 // but by fixing it, we'd remove a P and have XPointers
14167 // like /html/body/div/p[14]/text().113 reference the wrong P,
14168 // so keep doing bad to not mess past highlights...
14169 else if ( ch != text[i] ) {
14170 sameCh = false;
14171 break;
14172 }
14173 }
14174 if ( !ch )
14175 sameCh = false;
14176 if ( (ch=='-' || ch=='=' || ch=='_' || ch=='*' || ch=='#') && sameCh )
14177 isHr = true;
14178 }
14179 if ( isHr ) {
14180 OnTagOpen( NULL, U"hr" );
14181 OnTagBody();
14182 OnTagClose( NULL, U"hr" );
14183 } else if ( len > 0 ) {
14184 if ( autoPara ) {
14185 OnTagOpen( NULL, paraTag );
14186 OnTagBody();
14187 }
14188 _currNode->onText( text, len, flags, insert_before_last_child );
14189 if ( autoPara )
14190 OnTagClose( NULL, paraTag );
14191 }
14192 }
14193 if ( insert_before_last_child ) {
14194 // We have no _curFosteredNode to pop, so just restore
14195 // the previous table node
14196 _currNode = _curNodeBeforeFostering;
14197 _curNodeBeforeFostering = NULL;
14198 _curFosteredNode = NULL;
14199 }
14200 }
14201 //logfile << " !t!\n";
14202 }
14203
ldomDocumentWriterFilter(ldomDocument * document,bool headerOnly,const char *** rules)14204 ldomDocumentWriterFilter::ldomDocumentWriterFilter(ldomDocument * document, bool headerOnly, const char *** rules )
14205 : ldomDocumentWriter( document, headerOnly )
14206 , _libRuDocumentToDetect(true)
14207 , _libRuDocumentDetected(false)
14208 , _libRuParagraphStart(false)
14209 , _libRuParseAsPre(false)
14210 , _styleAttrId(0)
14211 , _classAttrId(0)
14212 , _tagBodyCalled(true)
14213 , _htmlTagSeen(false)
14214 , _headTagSeen(false)
14215 , _bodyTagSeen(false)
14216 , _curNodeIsSelfClosing(false)
14217 , _curTagIsIgnored(false)
14218 , _curNodeBeforeFostering(NULL)
14219 , _curFosteredNode(NULL)
14220 , _lastP(NULL)
14221 {
14222 if (_document->getDOMVersionRequested() >= 20200824) {
14223 // We're not using the provided rules, but hardcoded ones in AutoOpenClosePop()
14224 return;
14225 }
14226 lUInt16 i;
14227 for ( i=0; i<MAX_ELEMENT_TYPE_ID; i++ )
14228 _rules[i] = NULL;
14229 lUInt16 items[MAX_ELEMENT_TYPE_ID];
14230 for ( i=0; rules[i]; i++ ) {
14231 const char ** rule = rules[i];
14232 lUInt16 j;
14233 for ( j=0; rule[j] && j<MAX_ELEMENT_TYPE_ID; j++ ) {
14234 const char * s = rule[j];
14235 items[j] = _document->getElementNameIndex( lString32(s).c_str() );
14236 }
14237 if ( j>=1 ) {
14238 lUInt16 id = items[0];
14239 _rules[ id ] = new lUInt16[j];
14240 for ( int k=0; k<j; k++ ) {
14241 _rules[id][k] = k==j-1 ? 0 : items[k+1];
14242 }
14243 }
14244 }
14245 }
14246
~ldomDocumentWriterFilter()14247 ldomDocumentWriterFilter::~ldomDocumentWriterFilter()
14248 {
14249 if (_document->getDOMVersionRequested() >= 20200824) {
14250 return;
14251 }
14252 for ( int i=0; i<MAX_ELEMENT_TYPE_ID; i++ ) {
14253 if ( _rules[i] )
14254 delete[] _rules[i];
14255 }
14256 }
14257
14258 #if BUILD_LITE!=1
14259 static const char * doc_file_magic = "CR3\n";
14260
14261
serialize(SerialBuf & hdrbuf)14262 bool lxmlDocBase::DocFileHeader::serialize( SerialBuf & hdrbuf )
14263 {
14264 int start = hdrbuf.pos();
14265 hdrbuf.putMagic( doc_file_magic );
14266 //CRLog::trace("Serializing render data: %d %d %d %d", render_dx, render_dy, render_docflags, render_style_hash);
14267 hdrbuf << render_dx << render_dy << render_docflags << render_style_hash << stylesheet_hash << node_displaystyle_hash;
14268
14269 hdrbuf.putCRC( hdrbuf.pos() - start );
14270
14271 #if 0
14272 {
14273 lString8 s;
14274 s<<"SERIALIZED HDR BUF: ";
14275 for ( int i=0; i<hdrbuf.pos(); i++ ) {
14276 char tmp[20];
14277 sprintf(tmp, "%02x ", hdrbuf.buf()[i]);
14278 s<<tmp;
14279 }
14280 CRLog::trace(s.c_str());
14281 }
14282 #endif
14283 return !hdrbuf.error();
14284 }
14285
deserialize(SerialBuf & hdrbuf)14286 bool lxmlDocBase::DocFileHeader::deserialize( SerialBuf & hdrbuf )
14287 {
14288 int start = hdrbuf.pos();
14289 hdrbuf.checkMagic( doc_file_magic );
14290 if ( hdrbuf.error() ) {
14291 CRLog::error("Swap file Magic signature doesn't match");
14292 return false;
14293 }
14294 hdrbuf >> render_dx >> render_dy >> render_docflags >> render_style_hash >> stylesheet_hash >> node_displaystyle_hash;
14295 //CRLog::trace("Deserialized render data: %d %d %d %d", render_dx, render_dy, render_docflags, render_style_hash);
14296 hdrbuf.checkCRC( hdrbuf.pos() - start );
14297 if ( hdrbuf.error() ) {
14298 CRLog::error("Swap file - header unpack error");
14299 return false;
14300 }
14301 return true;
14302 }
14303 #endif
14304
setDocFlag(lUInt32 mask,bool value)14305 void tinyNodeCollection::setDocFlag( lUInt32 mask, bool value )
14306 {
14307 CRLog::debug("setDocFlag(%04x, %s)", mask, value?"true":"false");
14308 if ( value )
14309 _docFlags |= mask;
14310 else
14311 _docFlags &= ~mask;
14312 }
14313
setDocFlags(lUInt32 value)14314 void tinyNodeCollection::setDocFlags( lUInt32 value )
14315 {
14316 CRLog::debug("setDocFlags(%04x)", value);
14317 _docFlags = value;
14318 }
14319
getPersistenceFlags()14320 int tinyNodeCollection::getPersistenceFlags()
14321 {
14322 int format = 2; //getProps()->getIntDef(DOC_PROP_FILE_FORMAT, 0);
14323 int flag = ( format==2 && getDocFlag(DOC_FLAG_PREFORMATTED_TEXT) ) ? 1 : 0;
14324 CRLog::trace("getPersistenceFlags() returned %d", flag);
14325 return flag;
14326 }
14327
clear()14328 void ldomDocument::clear()
14329 {
14330 #if BUILD_LITE!=1
14331 clearRendBlockCache();
14332 _rendered = false;
14333 _urlImageMap.clear();
14334 _fontList.clear();
14335 fontMan->UnregisterDocumentFonts(_docIndex);
14336 #endif
14337 //TODO: implement clear
14338 //_elemStorage.
14339 }
14340
14341 #if BUILD_LITE!=1
openFromCache(CacheLoadingCallback * formatCallback,LVDocViewCallback * progressCallback)14342 bool ldomDocument::openFromCache( CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback )
14343 {
14344 setCacheFileStale(true);
14345 if ( !openCacheFile() ) {
14346 CRLog::info("Cannot open document from cache. Need to read fully");
14347 clear();
14348 return false;
14349 }
14350 if ( !loadCacheFileContent(formatCallback, progressCallback) ) {
14351 CRLog::info("Error while loading document content from cache file.");
14352 clear();
14353 return false;
14354 }
14355 #if 0
14356 LVStreamRef s = LVOpenFileStream("/tmp/test.xml", LVOM_WRITE);
14357 if ( !s.isNull() )
14358 saveToStream(s, "UTF8");
14359 #endif
14360 _mapped = true;
14361 _rendered = true;
14362 _just_rendered_from_cache = true;
14363 _toc_from_cache_valid = true;
14364 // Use cached node_displaystyle_hash as _nodeDisplayStyleHashInitial, as it
14365 // should be in sync with the DOM stored in the cache
14366 _nodeDisplayStyleHashInitial = _hdr.node_displaystyle_hash;
14367 CRLog::info("Initializing _nodeDisplayStyleHashInitial from cache file: %x", _nodeDisplayStyleHashInitial);
14368
14369 setCacheFileStale(false);
14370 return true;
14371 }
14372
14373 /// load document cache file content, @see saveChanges()
loadCacheFileContent(CacheLoadingCallback * formatCallback,LVDocViewCallback * progressCallback)14374 bool ldomDocument::loadCacheFileContent(CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback)
14375 {
14376
14377 CRLog::trace("ldomDocument::loadCacheFileContent()");
14378 {
14379 if (progressCallback) progressCallback->OnLoadFileProgress(5);
14380 SerialBuf propsbuf(0, true);
14381 if ( !_cacheFile->read( CBT_PROP_DATA, propsbuf ) ) {
14382 CRLog::error("Error while reading props data");
14383 return false;
14384 }
14385 getProps()->deserialize( propsbuf );
14386 if ( propsbuf.error() ) {
14387 CRLog::error("Cannot decode property table for document");
14388 return false;
14389 }
14390
14391 if ( formatCallback ) {
14392 int fmt = getProps()->getIntDef(DOC_PROP_FILE_FORMAT_ID,
14393 doc_format_fb2);
14394 if (fmt < doc_format_fb2 || fmt > doc_format_max)
14395 fmt = doc_format_fb2;
14396 // notify about format detection, to allow setting format-specific CSS
14397 formatCallback->OnCacheFileFormatDetected((doc_format_t)fmt);
14398 }
14399
14400 if (progressCallback) progressCallback->OnLoadFileProgress(10);
14401 CRLog::trace("ldomDocument::loadCacheFileContent() - ID data");
14402 SerialBuf idbuf(0, true);
14403 if ( !_cacheFile->read( CBT_MAPS_DATA, idbuf ) ) {
14404 CRLog::error("Error while reading Id data");
14405 return false;
14406 }
14407 deserializeMaps( idbuf );
14408 if ( idbuf.error() ) {
14409 CRLog::error("Cannot decode ID table for document");
14410 return false;
14411 }
14412
14413 if (progressCallback) progressCallback->OnLoadFileProgress(15);
14414 CRLog::trace("ldomDocument::loadCacheFileContent() - page data");
14415 SerialBuf pagebuf(0, true);
14416 if ( !_cacheFile->read( CBT_PAGE_DATA, pagebuf ) ) {
14417 CRLog::error("Error while reading pages data");
14418 return false;
14419 }
14420 pagebuf.swap( _pagesData );
14421 _pagesData.setPos( 0 );
14422 LVRendPageList pages;
14423 pages.deserialize(_pagesData);
14424 if ( _pagesData.error() ) {
14425 CRLog::error("Page data deserialization is failed");
14426 return false;
14427 }
14428 CRLog::info("%d pages read from cache file", pages.length());
14429 //_pagesData.setPos( 0 );
14430
14431 if (progressCallback) progressCallback->OnLoadFileProgress(20);
14432 CRLog::trace("ldomDocument::loadCacheFileContent() - embedded font data");
14433 {
14434 SerialBuf buf(0, true);
14435 if ( !_cacheFile->read(CBT_FONT_DATA, buf)) {
14436 CRLog::error("Error while reading font data");
14437 return false;
14438 }
14439 if (!_fontList.deserialize(buf)) {
14440 CRLog::error("Error while parsing font data");
14441 return false;
14442 }
14443 registerEmbeddedFonts();
14444 }
14445
14446 if (progressCallback) progressCallback->OnLoadFileProgress(25);
14447 DocFileHeader h = {};
14448 SerialBuf hdrbuf(0,true);
14449 if ( !_cacheFile->read( CBT_REND_PARAMS, hdrbuf ) ) {
14450 CRLog::error("Error while reading header data");
14451 return false;
14452 } else if ( !h.deserialize(hdrbuf) ) {
14453 CRLog::error("Header data deserialization is failed");
14454 return false;
14455 }
14456 _hdr = h;
14457 CRLog::info("Loaded render properties: styleHash=%x, stylesheetHash=%x, docflags=%x, width=%x, height=%x, nodeDisplayStyleHash=%x",
14458 _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy, _hdr.node_displaystyle_hash);
14459 }
14460
14461 if (progressCallback) progressCallback->OnLoadFileProgress(30);
14462 CRLog::trace("ldomDocument::loadCacheFileContent() - node data");
14463 if ( !loadNodeData() ) {
14464 CRLog::error("Error while reading node instance data");
14465 return false;
14466 }
14467
14468 if (progressCallback) progressCallback->OnLoadFileProgress(40);
14469 CRLog::trace("ldomDocument::loadCacheFileContent() - element storage");
14470 if ( !_elemStorage.load() ) {
14471 CRLog::error("Error while loading element data");
14472 return false;
14473 }
14474 if (progressCallback) progressCallback->OnLoadFileProgress(50);
14475 CRLog::trace("ldomDocument::loadCacheFileContent() - text storage");
14476 if ( !_textStorage.load() ) {
14477 CRLog::error("Error while loading text data");
14478 return false;
14479 }
14480 if (progressCallback) progressCallback->OnLoadFileProgress(60);
14481 CRLog::trace("ldomDocument::loadCacheFileContent() - rect storage");
14482 if ( !_rectStorage.load() ) {
14483 CRLog::error("Error while loading rect data");
14484 return false;
14485 }
14486 if (progressCallback) progressCallback->OnLoadFileProgress(70);
14487 CRLog::trace("ldomDocument::loadCacheFileContent() - node style storage");
14488 if ( !_styleStorage.load() ) {
14489 CRLog::error("Error while loading node style data");
14490 return false;
14491 }
14492
14493 if (progressCallback) progressCallback->OnLoadFileProgress(80);
14494 CRLog::trace("ldomDocument::loadCacheFileContent() - TOC");
14495 {
14496 SerialBuf tocbuf(0,true);
14497 if ( !_cacheFile->read( CBT_TOC_DATA, tocbuf ) ) {
14498 CRLog::error("Error while reading TOC data");
14499 return false;
14500 } else if ( !m_toc.deserialize(this, tocbuf) ) {
14501 CRLog::error("TOC data deserialization is failed");
14502 return false;
14503 }
14504 }
14505 if (progressCallback) progressCallback->OnLoadFileProgress(85);
14506 CRLog::trace("ldomDocument::loadCacheFileContent() - PageMap");
14507 {
14508 SerialBuf pagemapbuf(0,true);
14509 if ( !_cacheFile->read( CBT_PAGEMAP_DATA, pagemapbuf ) ) {
14510 CRLog::error("Error while reading PageMap data");
14511 return false;
14512 } else if ( !m_pagemap.deserialize(this, pagemapbuf) ) {
14513 CRLog::error("PageMap data deserialization is failed");
14514 return false;
14515 }
14516 }
14517
14518
14519 if (progressCallback) progressCallback->OnLoadFileProgress(90);
14520 if ( loadStylesData() ) {
14521 CRLog::trace("ldomDocument::loadCacheFileContent() - using loaded styles");
14522 updateLoadedStyles( true );
14523 // lUInt32 styleHash = calcStyleHash();
14524 // styleHash = styleHash * 31 + calcGlobalSettingsHash();
14525 // CRLog::debug("Loaded style hash: %x", styleHash);
14526 // lUInt32 styleHash = calcStyleHash();
14527 // CRLog::info("Loaded style hash = %08x", styleHash);
14528 } else {
14529 CRLog::trace("ldomDocument::loadCacheFileContent() - style loading failed: will reinit ");
14530 updateLoadedStyles( false );
14531 }
14532
14533 CRLog::trace("ldomDocument::loadCacheFileContent() - completed successfully");
14534 if (progressCallback) progressCallback->OnLoadFileProgress(95);
14535
14536 return true;
14537 }
14538
14539 static const char * styles_magic = "CRSTYLES";
14540
14541 #define CHECK_EXPIRATION(s) \
14542 if ( maxTime.expired() ) { CRLog::info("timer expired while " s); return CR_TIMEOUT; }
14543
14544 /// saves changes to cache file, limited by time interval (can be called again to continue after TIMEOUT)
saveChanges(CRTimerUtil & maxTime,LVDocViewCallback * progressCallback)14545 ContinuousOperationResult ldomDocument::saveChanges( CRTimerUtil & maxTime, LVDocViewCallback * progressCallback )
14546 {
14547 if ( !_cacheFile )
14548 return CR_DONE;
14549
14550 if (progressCallback) progressCallback->OnSaveCacheFileStart();
14551
14552 if (maxTime.infinite()) {
14553 _mapSavingStage = 0; // all stages from the beginning
14554 _cacheFile->setAutoSyncSize(0);
14555 } else {
14556 //CRLog::trace("setting autosync");
14557 _cacheFile->setAutoSyncSize(STREAM_AUTO_SYNC_SIZE);
14558 //CRLog::trace("setting autosync - done");
14559 }
14560
14561 CRLog::trace("ldomDocument::saveChanges(timeout=%d stage=%d)", maxTime.interval(), _mapSavingStage);
14562 setCacheFileStale(true);
14563
14564 switch (_mapSavingStage) {
14565 default:
14566 case 0:
14567
14568 if (!maxTime.infinite())
14569 _cacheFile->flush(false, maxTime);
14570 CHECK_EXPIRATION("flushing of stream")
14571
14572 persist( maxTime );
14573 CHECK_EXPIRATION("persisting of node data")
14574 if (progressCallback) progressCallback->OnSaveCacheFileProgress(0);
14575
14576 // fall through
14577 case 1:
14578 _mapSavingStage = 1;
14579 CRLog::trace("ldomDocument::saveChanges() - element storage");
14580
14581 if ( !_elemStorage.save(maxTime) ) {
14582 CRLog::error("Error while saving element data");
14583 return CR_ERROR;
14584 }
14585 CHECK_EXPIRATION("saving element storate")
14586 if (progressCallback) progressCallback->OnSaveCacheFileProgress(10);
14587 // fall through
14588 case 2:
14589 _mapSavingStage = 2;
14590 CRLog::trace("ldomDocument::saveChanges() - text storage");
14591 if ( !_textStorage.save(maxTime) ) {
14592 CRLog::error("Error while saving text data");
14593 return CR_ERROR;
14594 }
14595 CHECK_EXPIRATION("saving text storate")
14596 if (progressCallback) progressCallback->OnSaveCacheFileProgress(20);
14597 // fall through
14598 case 3:
14599 _mapSavingStage = 3;
14600 CRLog::trace("ldomDocument::saveChanges() - rect storage");
14601
14602 if ( !_rectStorage.save(maxTime) ) {
14603 CRLog::error("Error while saving rect data");
14604 return CR_ERROR;
14605 }
14606 CHECK_EXPIRATION("saving rect storate")
14607 if (progressCallback) progressCallback->OnSaveCacheFileProgress(30);
14608 // fall through
14609 case 41:
14610 _mapSavingStage = 41;
14611 CRLog::trace("ldomDocument::saveChanges() - blob storage data");
14612
14613 if ( _blobCache.saveToCache(maxTime) == CR_ERROR ) {
14614 CRLog::error("Error while saving blob storage data");
14615 return CR_ERROR;
14616 }
14617 if (!maxTime.infinite())
14618 _cacheFile->flush(false, maxTime); // intermediate flush
14619 CHECK_EXPIRATION("saving blob storage data")
14620 if (progressCallback) progressCallback->OnSaveCacheFileProgress(35);
14621 // fall through
14622 case 4:
14623 _mapSavingStage = 4;
14624 CRLog::trace("ldomDocument::saveChanges() - node style storage");
14625
14626 if ( !_styleStorage.save(maxTime) ) {
14627 CRLog::error("Error while saving node style data");
14628 return CR_ERROR;
14629 }
14630 if (!maxTime.infinite())
14631 _cacheFile->flush(false, maxTime); // intermediate flush
14632 CHECK_EXPIRATION("saving node style storage")
14633 if (progressCallback) progressCallback->OnSaveCacheFileProgress(40);
14634 // fall through
14635 case 5:
14636 _mapSavingStage = 5;
14637 CRLog::trace("ldomDocument::saveChanges() - misc data");
14638 {
14639 SerialBuf propsbuf(4096);
14640 getProps()->serialize( propsbuf );
14641 if ( !_cacheFile->write( CBT_PROP_DATA, propsbuf, COMPRESS_MISC_DATA ) ) {
14642 CRLog::error("Error while saving props data");
14643 return CR_ERROR;
14644 }
14645 }
14646 if (!maxTime.infinite())
14647 _cacheFile->flush(false, maxTime); // intermediate flush
14648 CHECK_EXPIRATION("saving props data")
14649 if (progressCallback) progressCallback->OnSaveCacheFileProgress(45);
14650 // fall through
14651 case 6:
14652 _mapSavingStage = 6;
14653 CRLog::trace("ldomDocument::saveChanges() - ID data");
14654 {
14655 SerialBuf idbuf(4096);
14656 serializeMaps( idbuf );
14657 if ( !_cacheFile->write( CBT_MAPS_DATA, idbuf, COMPRESS_MISC_DATA ) ) {
14658 CRLog::error("Error while saving Id data");
14659 return CR_ERROR;
14660 }
14661 }
14662 if (!maxTime.infinite())
14663 _cacheFile->flush(false, maxTime); // intermediate flush
14664 CHECK_EXPIRATION("saving ID data")
14665 if (progressCallback) progressCallback->OnSaveCacheFileProgress(50);
14666 // fall through
14667 case 7:
14668 _mapSavingStage = 7;
14669 if ( _pagesData.pos() ) {
14670 CRLog::trace("ldomDocument::saveChanges() - page data (%d bytes)", _pagesData.pos());
14671 if ( !_cacheFile->write( CBT_PAGE_DATA, _pagesData, COMPRESS_PAGES_DATA ) ) {
14672 CRLog::error("Error while saving pages data");
14673 return CR_ERROR;
14674 }
14675 } else {
14676 CRLog::trace("ldomDocument::saveChanges() - no page data");
14677 }
14678 if (!maxTime.infinite())
14679 _cacheFile->flush(false, maxTime); // intermediate flush
14680 CHECK_EXPIRATION("saving page data")
14681 if (progressCallback) progressCallback->OnSaveCacheFileProgress(60);
14682 // fall through
14683 case 8:
14684 _mapSavingStage = 8;
14685
14686 CRLog::trace("ldomDocument::saveChanges() - node data");
14687 if ( !saveNodeData() ) {
14688 CRLog::error("Error while node instance data");
14689 return CR_ERROR;
14690 }
14691 if (!maxTime.infinite())
14692 _cacheFile->flush(false, maxTime); // intermediate flush
14693 CHECK_EXPIRATION("saving node data")
14694 if (progressCallback) progressCallback->OnSaveCacheFileProgress(70);
14695 // fall through
14696 case 9:
14697 _mapSavingStage = 9;
14698 CRLog::trace("ldomDocument::saveChanges() - render info");
14699 {
14700 SerialBuf hdrbuf(0,true);
14701 if ( !_hdr.serialize(hdrbuf) ) {
14702 CRLog::error("Header data serialization is failed");
14703 return CR_ERROR;
14704 } else if ( !_cacheFile->write( CBT_REND_PARAMS, hdrbuf, false ) ) {
14705 CRLog::error("Error while writing header data");
14706 return CR_ERROR;
14707 }
14708 }
14709 CRLog::info("Saving render properties: styleHash=%x, stylesheetHash=%x, docflags=%x, width=%x, height=%x, nodeDisplayStyleHash=%x",
14710 _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy, _hdr.node_displaystyle_hash);
14711 if (progressCallback) progressCallback->OnSaveCacheFileProgress(73);
14712
14713 CRLog::trace("ldomDocument::saveChanges() - TOC");
14714 {
14715 SerialBuf tocbuf(0,true);
14716 if ( !m_toc.serialize(tocbuf) ) {
14717 CRLog::error("TOC data serialization is failed");
14718 return CR_ERROR;
14719 } else if ( !_cacheFile->write( CBT_TOC_DATA, tocbuf, COMPRESS_TOC_DATA ) ) {
14720 CRLog::error("Error while writing TOC data");
14721 return CR_ERROR;
14722 }
14723 }
14724 if (progressCallback) progressCallback->OnSaveCacheFileProgress(76);
14725
14726 CRLog::trace("ldomDocument::saveChanges() - PageMap");
14727 {
14728 SerialBuf pagemapbuf(0,true);
14729 if ( !m_pagemap.serialize(pagemapbuf) ) {
14730 CRLog::error("PageMap data serialization is failed");
14731 return CR_ERROR;
14732 } else if ( !_cacheFile->write( CBT_PAGEMAP_DATA, pagemapbuf, COMPRESS_PAGEMAP_DATA ) ) {
14733 CRLog::error("Error while writing PageMap data");
14734 return CR_ERROR;
14735 }
14736 }
14737 if (!maxTime.infinite())
14738 _cacheFile->flush(false, maxTime); // intermediate flush
14739 CHECK_EXPIRATION("saving TOC data")
14740 if (progressCallback) progressCallback->OnSaveCacheFileProgress(80);
14741 // fall through
14742 case 10:
14743 _mapSavingStage = 10;
14744
14745 if ( !saveStylesData() ) {
14746 CRLog::error("Error while writing style data");
14747 return CR_ERROR;
14748 }
14749 if (progressCallback) progressCallback->OnSaveCacheFileProgress(90);
14750 // fall through
14751 case 11:
14752 _mapSavingStage = 11;
14753 CRLog::trace("ldomDocument::saveChanges() - embedded fonts");
14754 {
14755 SerialBuf buf(4096);
14756 _fontList.serialize(buf);
14757 if (!_cacheFile->write(CBT_FONT_DATA, buf, COMPRESS_MISC_DATA) ) {
14758 CRLog::error("Error while saving embedded font data");
14759 return CR_ERROR;
14760 }
14761 CHECK_EXPIRATION("saving embedded fonts")
14762 }
14763 if (progressCallback) progressCallback->OnSaveCacheFileProgress(95);
14764 // fall through
14765 case 12:
14766 _mapSavingStage = 12;
14767 CRLog::trace("ldomDocument::saveChanges() - flush");
14768 {
14769 CRTimerUtil infinite;
14770 if ( !_cacheFile->flush(true, infinite) ) {
14771 CRLog::error("Error while updating index of cache file");
14772 return CR_ERROR;
14773 }
14774 CHECK_EXPIRATION("flushing")
14775 }
14776 if (progressCallback) progressCallback->OnSaveCacheFileProgress(100);
14777 // fall through
14778 case 13:
14779 _mapSavingStage = 13;
14780 setCacheFileStale(false);
14781 }
14782 CRLog::trace("ldomDocument::saveChanges() - done");
14783 if (progressCallback) progressCallback->OnSaveCacheFileEnd();
14784 return CR_DONE;
14785 }
14786
14787 /// save changes to cache file, @see loadCacheFileContent()
saveChanges()14788 bool ldomDocument::saveChanges()
14789 {
14790 if ( !_cacheFile )
14791 return true;
14792 CRLog::debug("ldomDocument::saveChanges() - infinite");
14793 CRTimerUtil timerNoLimit;
14794 ContinuousOperationResult res = saveChanges(timerNoLimit);
14795 return res!=CR_ERROR;
14796 }
14797
saveStylesData()14798 bool tinyNodeCollection::saveStylesData()
14799 {
14800 SerialBuf stylebuf(0, true);
14801 lUInt32 stHash = _stylesheet.getHash();
14802 LVArray<css_style_ref_t> * list = _styles.getIndex();
14803 stylebuf.putMagic(styles_magic);
14804 stylebuf << stHash;
14805 stylebuf << (lUInt32)list->length(); // index
14806 for ( int i=0; i<list->length(); i++ ) {
14807 css_style_ref_t rec = list->get(i);
14808 if ( !rec.isNull() ) {
14809 stylebuf << (lUInt32)i; // index
14810 rec->serialize( stylebuf ); // style
14811 }
14812 }
14813 stylebuf << (lUInt32)0; // index=0 is end list mark
14814 stylebuf.putMagic(styles_magic);
14815 delete list;
14816 if ( stylebuf.error() )
14817 return false;
14818 CRLog::trace("Writing style data: %d bytes", stylebuf.pos());
14819 if ( !_cacheFile->write( CBT_STYLE_DATA, stylebuf, COMPRESS_STYLE_DATA) ) {
14820 return false;
14821 }
14822 return !stylebuf.error();
14823 }
14824
loadStylesData()14825 bool tinyNodeCollection::loadStylesData()
14826 {
14827 SerialBuf stylebuf(0, true);
14828 if ( !_cacheFile->read( CBT_STYLE_DATA, stylebuf ) ) {
14829 CRLog::error("Error while reading style data");
14830 return false;
14831 }
14832 lUInt32 stHash = 0;
14833 lInt32 len = 0;
14834
14835 // lUInt32 myHash = _stylesheet.getHash();
14836 // When loading from cache, this stylesheet was built with the
14837 // initial element name ids, which may have been replaced by
14838 // the one restored from the cache. So, its hash may be different
14839 // from the one we're going to load from cache.
14840 // This is not a failure, but a sign the stylesheet will have
14841 // to be regenerated (later, no need for it currently as we're
14842 // loading previously applied style data): this will be checked
14843 // in checkRenderContext() when comparing a combo hash
14844 // against _hdr.stylesheet_hash fetched from the cache.
14845
14846 //LVArray<css_style_ref_t> * list = _styles.getIndex();
14847 stylebuf.checkMagic(styles_magic);
14848 stylebuf >> stHash;
14849 // Don't check for this:
14850 // if ( stHash != myHash ) {
14851 // CRLog::info("tinyNodeCollection::loadStylesData() - stylesheet hash is changed: skip loading styles");
14852 // return false;
14853 // }
14854 stylebuf >> len; // index
14855 if ( stylebuf.error() )
14856 return false;
14857 LVArray<css_style_ref_t> list(len, css_style_ref_t());
14858 for ( int i=0; i<list.length(); i++ ) {
14859 lUInt32 index = 0;
14860 stylebuf >> index; // index
14861 if ( index<=0 || (int)index>=len || stylebuf.error() )
14862 break;
14863 css_style_ref_t rec( new css_style_rec_t() );
14864 if ( !rec->deserialize(stylebuf) )
14865 break;
14866 list.set( index, rec );
14867 }
14868 stylebuf.checkMagic(styles_magic);
14869 if ( stylebuf.error() )
14870 return false;
14871
14872 CRLog::trace("Setting style data: %d bytes", stylebuf.size());
14873 _styles.setIndex( list );
14874
14875 return !stylebuf.error();
14876 }
14877
calcStyleHash(bool already_rendered)14878 lUInt32 tinyNodeCollection::calcStyleHash(bool already_rendered)
14879 {
14880 CRLog::debug("calcStyleHash start");
14881 // int maxlog = 20;
14882 lUInt32 res = 0; //_elemCount;
14883 lUInt32 globalHash = calcGlobalSettingsHash(getFontContextDocIndex(), already_rendered);
14884 lUInt32 docFlags = getDocFlags();
14885 //CRLog::info("Calculating style hash... elemCount=%d, globalHash=%08x, docFlags=%08x", _elemCount, globalHash, docFlags);
14886 if (_nodeStyleHash) {
14887 // Re-use saved _nodeStyleHash if it has not been invalidated,
14888 // as the following loop can be expensive
14889 res = _nodeStyleHash;
14890 CRLog::debug(" using saved _nodeStyleHash %x", res);
14891 }
14892 else {
14893 // We also compute _nodeDisplayStyleHash from each node style->display. It
14894 // may not change as often as _nodeStyleHash, but if it does, it means
14895 // some nodes switched between 'block' and 'inline', and that some autoBoxing
14896 // that may have been added should no more be in the DOM for a correct
14897 // rendering: in that case, the user will have to reload the document, and
14898 // we should invalidate the cache so a new correct DOM is build on load.
14899 _nodeDisplayStyleHash = 0;
14900
14901 int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
14902 for ( int i=0; i<count; i++ ) {
14903 int offs = i*TNC_PART_LEN;
14904 int sz = TNC_PART_LEN;
14905 if ( offs + sz > _elemCount+1 ) {
14906 sz = _elemCount+1 - offs;
14907 }
14908 ldomNode * buf = _elemList[i];
14909 if ( !buf ) continue; // avoid clang-tidy warning
14910 for ( int j=0; j<sz; j++ ) {
14911 if ( buf[j].isElement() ) {
14912 css_style_ref_t style = buf[j].getStyle();
14913 lUInt32 sh = calcHash( style );
14914 res = res * 31 + sh;
14915 if (!style.isNull()) {
14916 _nodeDisplayStyleHash = _nodeDisplayStyleHash * 31 + style.get()->display;
14917 // Also account in this hash if this node is "white_space: pre" or alike.
14918 // If white_space changes from/to "pre"-like to/from "normal"-like,
14919 // the document will need to be reloaded so that the HTML text parts
14920 // are parsed according the the PRE/not-PRE rules
14921 if (style.get()->white_space >= css_ws_pre_line)
14922 _nodeDisplayStyleHash += 29;
14923 // Also account for style->float_, as it should create/remove new floatBox
14924 // elements wrapping floats when toggling BLOCK_RENDERING_ENHANCED
14925 if (style.get()->float_ > css_f_none)
14926 _nodeDisplayStyleHash += 123;
14927 }
14928 //printf("element %d %d style hash: %x\n", i, j, sh);
14929 LVFontRef font = buf[j].getFont();
14930 lUInt32 fh = calcHash( font );
14931 res = res * 31 + fh;
14932 //printf("element %d %d font hash: %x\n", i, j, fh);
14933 // if ( maxlog>0 && sh==0 ) {
14934 // style = buf[j].getStyle();
14935 // CRLog::trace("[%06d] : s=%08x f=%08x res=%08x", offs+j, sh, fh, res);
14936 // maxlog--;
14937 // }
14938 }
14939 }
14940 }
14941
14942 CRLog::debug(" COMPUTED _nodeStyleHash %x", res);
14943 _nodeStyleHash = res;
14944 CRLog::debug(" COMPUTED _nodeDisplayStyleHash %x (initial: %x)", _nodeDisplayStyleHash, _nodeDisplayStyleHashInitial);
14945 }
14946 CRLog::info("Calculating style hash... elemCount=%d, globalHash=%08x, docFlags=%08x, nodeStyleHash=%08x", _elemCount, globalHash, docFlags, res);
14947 res = res * 31 + _imgScalingOptions.getHash();
14948 res = res * 31 + _spaceWidthScalePercent;
14949 res = res * 31 + _minSpaceCondensingPercent;
14950 res = res * 31 + _unusedSpaceThresholdPercent;
14951
14952 // _maxAddedLetterSpacingPercent does not need to be accounted, as, working
14953 // only on a laid out line, it does not need a re-rendering, but just
14954 // a _renderedBlockCache.clear() to reformat paragraphs and have the
14955 // word re-positioned (the paragraphs width & height do not change)
14956
14957 // Hanging punctuation does not need to trigger a re-render, as
14958 // it's now ensured by alignLine() and won't change paragraphs height.
14959 // We just need to _renderedBlockCache.clear() when it changes.
14960 // if ( _hangingPunctuationEnabled )
14961 // res = res * 75 + 1761;
14962
14963 res = res * 31 + _renderBlockRenderingFlags;
14964 res = res * 31 + _interlineScaleFactor;
14965
14966 res = (res * 31 + globalHash) * 31 + docFlags;
14967 // CRLog::info("Calculated style hash = %08x", res);
14968 CRLog::debug("calcStyleHash done");
14969 return res;
14970 }
14971
validateChild(ldomNode * node)14972 static void validateChild( ldomNode * node )
14973 {
14974 // DEBUG TEST
14975 if ( !node->isRoot() && node->getParentNode()->getChildIndex( node->getDataIndex() )<0 ) {
14976 CRLog::error("Invalid parent->child relation for nodes %d->%d", node->getParentNode()->getDataIndex(), node->getParentNode()->getDataIndex() );
14977 }
14978 }
14979
14980 /// called on document loading end
validateDocument()14981 bool tinyNodeCollection::validateDocument()
14982 {
14983 ((ldomDocument*)this)->getRootNode()->recurseElements(validateChild);
14984 int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
14985 bool res = true;
14986 for ( int i=0; i<count; i++ ) {
14987 int offs = i*TNC_PART_LEN;
14988 int sz = TNC_PART_LEN;
14989 if ( offs + sz > _elemCount+1 ) {
14990 sz = _elemCount+1 - offs;
14991 }
14992 ldomNode * buf = _elemList[i];
14993 for ( int j=0; j<sz; j++ ) {
14994 buf[j].setDocumentIndex( _docIndex );
14995 if ( buf[j].isElement() ) {
14996 lUInt16 style = getNodeStyleIndex( buf[j]._handle._dataIndex );
14997 lUInt16 font = getNodeFontIndex( buf[j]._handle._dataIndex );;
14998 if ( !style ) {
14999 if ( !buf[j].isRoot() ) {
15000 CRLog::error("styleId=0 for node <%s> %d", LCSTR(buf[j].getNodeName()), buf[j].getDataIndex());
15001 res = false;
15002 }
15003 } else if ( _styles.get(style).isNull() ) {
15004 CRLog::error("styleId!=0, but absent in cache for node <%s> %d", LCSTR(buf[j].getNodeName()), buf[j].getDataIndex());
15005 res = false;
15006 }
15007 if ( !font ) {
15008 if ( !buf[j].isRoot() ) {
15009 CRLog::error("fontId=0 for node <%s>", LCSTR(buf[j].getNodeName()));
15010 res = false;
15011 }
15012 } else if ( _fonts.get(font).isNull() ) {
15013 CRLog::error("fontId!=0, but absent in cache for node <%s>", LCSTR(buf[j].getNodeName()));
15014 res = false;
15015 }
15016 }
15017 }
15018 }
15019 return res;
15020 }
15021
updateLoadedStyles(bool enabled)15022 bool tinyNodeCollection::updateLoadedStyles( bool enabled )
15023 {
15024 int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT);
15025 bool res = true;
15026 LVArray<css_style_ref_t> * list = _styles.getIndex();
15027
15028 _fontMap.clear(); // style index to font index
15029
15030 for ( int i=0; i<count; i++ ) {
15031 int offs = i*TNC_PART_LEN;
15032 int sz = TNC_PART_LEN;
15033 if ( offs + sz > _elemCount+1 ) {
15034 sz = _elemCount+1 - offs;
15035 }
15036 ldomNode * buf = _elemList[i];
15037 for ( int j=0; j<sz; j++ ) {
15038 buf[j].setDocumentIndex( _docIndex );
15039 if ( buf[j].isElement() ) {
15040 lUInt16 style = getNodeStyleIndex( buf[j]._handle._dataIndex );
15041 if ( enabled && style!=0 ) {
15042 css_style_ref_t s = list->get( style );
15043 if ( !s.isNull() ) {
15044 lUInt16 fntIndex = _fontMap.get( style );
15045 if ( fntIndex==0 ) {
15046 LVFontRef fnt = getFont(s.get(), getFontContextDocIndex());
15047 fntIndex = (lUInt16)_fonts.cache( fnt );
15048 if ( fnt.isNull() ) {
15049 CRLog::error("font not found for style!");
15050 } else {
15051 _fontMap.set(style, fntIndex);
15052 }
15053 } else {
15054 _fonts.addIndexRef( fntIndex );
15055 }
15056 if ( fntIndex<=0 ) {
15057 CRLog::error("font caching failed for style!");
15058 res = false;
15059 } else {
15060 setNodeFontIndex( buf[j]._handle._dataIndex, fntIndex );
15061 //buf[j]._data._pelem._fontIndex = fntIndex;
15062 }
15063 } else {
15064 CRLog::error("Loaded style index %d not found in style collection", (int)style);
15065 setNodeFontIndex( buf[j]._handle._dataIndex, 0 );
15066 setNodeStyleIndex( buf[j]._handle._dataIndex, 0 );
15067 // buf[j]._data._pelem._styleIndex = 0;
15068 // buf[j]._data._pelem._fontIndex = 0;
15069 res = false;
15070 }
15071 } else {
15072 setNodeFontIndex( buf[j]._handle._dataIndex, 0 );
15073 setNodeStyleIndex( buf[j]._handle._dataIndex, 0 );
15074 // buf[j]._data._pelem._styleIndex = 0;
15075 // buf[j]._data._pelem._fontIndex = 0;
15076 }
15077 }
15078 }
15079 }
15080 #ifdef TODO_INVESTIGATE
15081 if ( enabled && res) {
15082 //_styles.setIndex( *list );
15083 // correct list reference counters
15084
15085 for ( int i=0; i<list->length(); i++ ) {
15086 if ( !list->get(i).isNull() ) {
15087 // decrease reference counter
15088 // TODO:
15089 //_styles.release( list->get(i) );
15090 }
15091 }
15092 }
15093 #endif
15094 delete list;
15095 // getRootNode()->setFont( _def_font );
15096 // getRootNode()->setStyle( _def_style );
15097 _nodeStyleHash = 0;
15098 return res;
15099 }
15100
15101 /// swaps to cache file or saves changes, limited by time interval
swapToCache(CRTimerUtil & maxTime)15102 ContinuousOperationResult ldomDocument::swapToCache( CRTimerUtil & maxTime )
15103 {
15104 CRLog::trace("ldomDocument::swapToCache entered");
15105 if ( _maperror )
15106 return CR_ERROR;
15107 if ( !_mapped ) {
15108 CRLog::trace("ldomDocument::swapToCache creating cache file");
15109 if ( !createCacheFile() ) {
15110 CRLog::error("ldomDocument::swapToCache: failed: cannot create cache file");
15111 _maperror = true;
15112 return CR_ERROR;
15113 }
15114 }
15115 _mapped = true;
15116 if (!maxTime.infinite()) {
15117 CRLog::info("Cache file is created, but document saving is postponed");
15118 return CR_TIMEOUT;
15119 }
15120 ContinuousOperationResult res = saveChanges(maxTime);
15121 if ( res==CR_ERROR )
15122 {
15123 CRLog::error("Error while saving changes to cache file");
15124 _maperror = true;
15125 return CR_ERROR;
15126 }
15127 CRLog::info("Successfully saved document to cache file: %dK", _cacheFile->getSize()/1024 );
15128 return res;
15129 }
15130
15131 /// saves recent changes to mapped file
updateMap(CRTimerUtil & maxTime,LVDocViewCallback * progressCallback)15132 ContinuousOperationResult ldomDocument::updateMap(CRTimerUtil & maxTime, LVDocViewCallback * progressCallback)
15133 {
15134 if ( !_cacheFile || !_mapped )
15135 return CR_DONE;
15136
15137 if ( _cacheFileLeaveAsDirty ) {
15138 CRLog::info("requested to set cache file as dirty without any update");
15139 _cacheFile->setDirtyFlag(true);
15140 return CR_DONE;
15141 }
15142
15143 if ( !_cacheFileStale) {
15144 CRLog::info("No change, cache file update not needed");
15145 return CR_DONE;
15146 }
15147 CRLog::info("Updating cache file");
15148
15149 ContinuousOperationResult res = saveChanges(maxTime, progressCallback); // NOLINT: Call to virtual function during destruction
15150 if ( res==CR_ERROR )
15151 {
15152 CRLog::error("Error while saving changes to cache file");
15153 return CR_ERROR;
15154 }
15155
15156 if ( res==CR_DONE ) {
15157 CRLog::info("Cache file updated successfully");
15158 dumpStatistics();
15159 }
15160 return res;
15161 }
15162
15163 #endif
15164
15165 static const char * doccache_magic = "CoolReader3 Document Cache Directory Index\nV1.00\n";
15166
15167 /// document cache
15168 class ldomDocCacheImpl : public ldomDocCache
15169 {
15170 lString32 _cacheDir;
15171 lvsize_t _maxSize;
15172 lUInt32 _oldStreamSize;
15173 lUInt32 _oldStreamCRC;
15174
15175 struct FileItem {
15176 lString32 filename;
15177 lUInt32 size;
15178 };
15179 LVPtrVector<FileItem> _files;
15180 public:
ldomDocCacheImpl(lString32 cacheDir,lvsize_t maxSize)15181 ldomDocCacheImpl( lString32 cacheDir, lvsize_t maxSize )
15182 : _cacheDir( cacheDir ), _maxSize( maxSize ), _oldStreamSize(0), _oldStreamCRC(0)
15183 {
15184 LVAppendPathDelimiter( _cacheDir );
15185 CRLog::trace("ldomDocCacheImpl(%s maxSize=%d)", LCSTR(_cacheDir), (int)maxSize);
15186 }
15187
writeIndex()15188 bool writeIndex()
15189 {
15190 lString32 filename = _cacheDir + "cr3cache.inx";
15191 if (_oldStreamSize == 0)
15192 {
15193 LVStreamRef oldStream = LVOpenFileStream(filename.c_str(), LVOM_READ);
15194 if (!oldStream.isNull()) {
15195 _oldStreamSize = (lUInt32)oldStream->GetSize();
15196 _oldStreamCRC = (lUInt32)oldStream->getcrc32();
15197 }
15198 }
15199
15200 // fill buffer
15201 SerialBuf buf( 16384, true );
15202 buf.putMagic( doccache_magic );
15203 lUInt32 start = buf.pos();
15204 int count = _files.length();
15205 buf << (lUInt32)count;
15206 for ( int i=0; i<count && !buf.error(); i++ ) {
15207 FileItem * item = _files[i];
15208 buf << item->filename;
15209 buf << item->size;
15210 CRLog::trace("cache item: %s %d", LCSTR(item->filename), (int)item->size);
15211 }
15212 buf.putCRC( buf.pos() - start );
15213 if ( buf.error() )
15214 return false;
15215 lUInt32 newCRC = buf.getCRC();
15216 lUInt32 newSize = buf.pos();
15217
15218 // check to avoid rewritting of identical file
15219 if (newCRC != _oldStreamCRC || newSize != _oldStreamSize) {
15220 // changed: need to write
15221 CRLog::trace("Writing cache index");
15222 LVStreamRef stream = LVOpenFileStream(filename.c_str(), LVOM_WRITE);
15223 if ( !stream )
15224 return false;
15225 if ( stream->Write( buf.buf(), buf.pos(), NULL )!=LVERR_OK )
15226 return false;
15227 _oldStreamCRC = newCRC;
15228 _oldStreamSize = newSize;
15229 }
15230 return true;
15231 }
15232
readIndex()15233 bool readIndex( )
15234 {
15235 lString32 filename = _cacheDir + "cr3cache.inx";
15236 // read index
15237 lUInt32 totalSize = 0;
15238 LVStreamRef instream = LVOpenFileStream( filename.c_str(), LVOM_READ );
15239 if ( !instream.isNull() ) {
15240 LVStreamBufferRef sb = instream->GetReadBuffer(0, instream->GetSize() );
15241 if ( !sb )
15242 return false;
15243 SerialBuf buf( sb->getReadOnly(), sb->getSize() );
15244 if ( !buf.checkMagic( doccache_magic ) ) {
15245 CRLog::error("wrong cache index file format");
15246 return false;
15247 }
15248
15249 lUInt32 start = buf.pos();
15250 lUInt32 count;
15251 buf >> count;
15252 for (lUInt32 i=0; i < count && !buf.error(); i++) {
15253 FileItem * item = new FileItem();
15254 _files.add( item );
15255 buf >> item->filename;
15256 buf >> item->size;
15257 CRLog::trace("cache %d: %s [%d]", i, UnicodeToUtf8(item->filename).c_str(), (int)item->size );
15258 totalSize += item->size;
15259 }
15260 if ( !buf.checkCRC( buf.pos() - start ) ) {
15261 CRLog::error("CRC32 doesn't match in cache index file");
15262 return false;
15263 }
15264
15265 if ( buf.error() )
15266 return false;
15267
15268 CRLog::info( "Document cache index file read ok, %d files in cache, %d bytes", _files.length(), totalSize );
15269 return true;
15270 } else {
15271 CRLog::error( "Document cache index file cannot be read" );
15272 return false;
15273 }
15274 }
15275
15276 /// remove all .cr3 files which are not listed in index
removeExtraFiles()15277 bool removeExtraFiles( )
15278 {
15279 LVContainerRef container;
15280 container = LVOpenDirectory( _cacheDir.c_str(), U"*.cr3" );
15281 if ( container.isNull() ) {
15282 if ( !LVCreateDirectory( _cacheDir ) ) {
15283 CRLog::error("Cannot create directory %s", UnicodeToUtf8(_cacheDir).c_str() );
15284 return false;
15285 }
15286 container = LVOpenDirectory( _cacheDir.c_str(), U"*.cr3" );
15287 if ( container.isNull() ) {
15288 CRLog::error("Cannot open directory %s", UnicodeToUtf8(_cacheDir).c_str() );
15289 return false;
15290 }
15291 }
15292 for ( int i=0; i<container->GetObjectCount(); i++ ) {
15293 const LVContainerItemInfo * item = container->GetObjectInfo( i );
15294 if ( !item->IsContainer() ) {
15295 lString32 fn = item->GetName();
15296 if ( !fn.endsWith(".cr3") )
15297 continue;
15298 if ( findFileIndex(fn)<0 ) {
15299 // delete file
15300 CRLog::info("Removing cache file not specified in index: %s", UnicodeToUtf8(fn).c_str() );
15301 if ( !LVDeleteFile( _cacheDir + fn ) ) {
15302 CRLog::error("Error while removing cache file not specified in index: %s", UnicodeToUtf8(fn).c_str() );
15303 }
15304 }
15305 }
15306 }
15307 return true;
15308 }
15309
15310 // remove all extra files to add new one of specified size
reserve(lvsize_t allocSize)15311 bool reserve( lvsize_t allocSize )
15312 {
15313 bool res = true;
15314 // remove extra files specified in list
15315 lvsize_t dirsize = allocSize;
15316 for ( int i=0; i<_files.length(); ) {
15317 if ( LVFileExists( _cacheDir + _files[i]->filename ) ) {
15318 if ( (i>0 || allocSize>0) && dirsize+_files[i]->size > _maxSize ) {
15319 if ( LVDeleteFile( _cacheDir + _files[i]->filename ) ) {
15320 _files.erase(i, 1);
15321 } else {
15322 CRLog::error("Cannot delete cache file %s", UnicodeToUtf8(_files[i]->filename).c_str() );
15323 dirsize += _files[i]->size;
15324 res = false;
15325 i++;
15326 }
15327 } else {
15328 dirsize += _files[i]->size;
15329 i++;
15330 }
15331 } else {
15332 CRLog::error("File %s is found in cache index, but does not exist", UnicodeToUtf8(_files[i]->filename).c_str() );
15333 _files.erase(i, 1);
15334 }
15335 }
15336 return res;
15337 }
15338
findFileIndex(lString32 filename)15339 int findFileIndex( lString32 filename )
15340 {
15341 for ( int i=0; i<_files.length(); i++ ) {
15342 if ( _files[i]->filename == filename )
15343 return i;
15344 }
15345 return -1;
15346 }
15347
moveFileToTop(lString32 filename,lUInt32 size)15348 bool moveFileToTop( lString32 filename, lUInt32 size )
15349 {
15350 int index = findFileIndex( filename );
15351 if ( index<0 ) {
15352 FileItem * item = new FileItem();
15353 item->filename = filename;
15354 item->size = size;
15355 _files.insert( 0, item );
15356 } else {
15357 _files.move( 0, index );
15358 _files[0]->size = size;
15359 }
15360 return writeIndex();
15361 }
15362
init()15363 bool init()
15364 {
15365 CRLog::info("Initialize document cache in directory %s", UnicodeToUtf8(_cacheDir).c_str() );
15366 // read index
15367 if ( readIndex( ) ) {
15368 // read successfully
15369 // remove files not specified in list
15370 removeExtraFiles( );
15371 } else {
15372 if ( !LVCreateDirectory( _cacheDir ) ) {
15373 CRLog::error("Document Cache: cannot create cache directory %s, disabling cache", UnicodeToUtf8(_cacheDir).c_str() );
15374 return false;
15375 }
15376 _files.clear();
15377
15378 }
15379 reserve(0);
15380 if ( !writeIndex() )
15381 return false; // cannot write index: read only?
15382 return true;
15383 }
15384
15385 /// remove all files
clear()15386 bool clear()
15387 {
15388 for ( int i=0; i<_files.length(); i++ )
15389 LVDeleteFile( _files[i]->filename );
15390 _files.clear();
15391 return writeIndex();
15392 }
15393
15394 // dir/filename.{crc32}.cr3
makeFileName(lString32 filename,lUInt32 crc,lUInt32 docFlags)15395 lString32 makeFileName( lString32 filename, lUInt32 crc, lUInt32 docFlags )
15396 {
15397 lString32 fn;
15398 lString8 filename8 = UnicodeToTranslit(filename);
15399 bool lastUnderscore = false;
15400 int goodCount = 0;
15401 int badCount = 0;
15402 for (int i = 0; i < filename8.length(); i++) {
15403 lChar32 ch = filename8[i];
15404
15405 if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '.' || ch == '-') {
15406 fn << ch;
15407 lastUnderscore = false;
15408 goodCount++;
15409 } else {
15410 if (!lastUnderscore) {
15411 fn << U"_";
15412 lastUnderscore = true;
15413 }
15414 badCount++;
15415 }
15416 }
15417 if (goodCount < 2 || badCount > goodCount * 2)
15418 fn << "_noname";
15419 if (fn.length() > 25)
15420 fn = fn.substr(0, 12) + "-" + fn.substr(fn.length()-12, 12);
15421 char s[16];
15422 sprintf(s, ".%08x.%d.cr3", (unsigned)crc, (int)docFlags);
15423 return fn + lString32( s ); //_cacheDir +
15424 }
15425
15426 /// open existing cache file stream
openExisting(lString32 filename,lUInt32 crc,lUInt32 docFlags,lString32 & cachePath)15427 LVStreamRef openExisting( lString32 filename, lUInt32 crc, lUInt32 docFlags, lString32 &cachePath )
15428 {
15429 lString32 fn = makeFileName( filename, crc, docFlags );
15430 CRLog::debug("ldomDocCache::openExisting(%s)", LCSTR(fn));
15431 // Try filename with ".keep" extension (that a user can manually add
15432 // to a .cr3 cache file, for it to no more be maintained by crengine
15433 // in its index, thus not subject to _maxSize enforcement, so sure
15434 // to not be deleted by crengine)
15435 lString32 fn_keep = _cacheDir + fn + ".keep";
15436 if ( LVFileExists(fn_keep) ) {
15437 LVStreamRef stream = LVOpenFileStream( fn_keep.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15438 if ( !stream.isNull() ) {
15439 CRLog::info( "ldomDocCache::openExisting - opening user renamed cache file %s", UnicodeToUtf8(fn_keep).c_str() );
15440 cachePath = fn_keep;
15441 #if ENABLED_BLOCK_WRITE_CACHE
15442 stream = LVCreateBlockWriteStream( stream, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15443 #endif
15444 return stream;
15445 }
15446 }
15447 LVStreamRef res;
15448 if ( findFileIndex( fn ) < 0 ) {
15449 CRLog::error( "ldomDocCache::openExisting - File %s is not found in cache index", UnicodeToUtf8(fn).c_str() );
15450 return res;
15451 }
15452 lString32 pathname = _cacheDir + fn;
15453 res = LVOpenFileStream( pathname.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15454 if ( !res ) {
15455 CRLog::error( "ldomDocCache::openExisting - File %s is listed in cache index, but cannot be opened", UnicodeToUtf8(fn).c_str() );
15456 return res;
15457 }
15458 cachePath = pathname;
15459
15460 #if ENABLED_BLOCK_WRITE_CACHE
15461 res = LVCreateBlockWriteStream( res, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15462 #if TEST_BLOCK_STREAM
15463
15464 LVStreamRef stream2 = LVOpenFileStream( (_cacheDir + fn + "_c").c_str(), LVOM_APPEND );
15465 if ( !stream2 ) {
15466 CRLog::error( "ldomDocCache::openExisting - file %s is cannot be created", UnicodeToUtf8(fn).c_str() );
15467 return stream2;
15468 }
15469 res = LVCreateCompareTestStream(res, stream2);
15470 #endif
15471 #endif
15472
15473 lUInt32 fileSize = (lUInt32) res->GetSize();
15474 moveFileToTop( fn, fileSize );
15475 return res;
15476 }
15477
15478 /// create new cache file
createNew(lString32 filename,lUInt32 crc,lUInt32 docFlags,lUInt32 fileSize,lString32 & cachePath)15479 LVStreamRef createNew( lString32 filename, lUInt32 crc, lUInt32 docFlags, lUInt32 fileSize, lString32 &cachePath )
15480 {
15481 lString32 fn = makeFileName( filename, crc, docFlags );
15482 LVStreamRef res;
15483 lString32 pathname = _cacheDir + fn;
15484 // If this cache filename exists with a ".keep" extension (manually
15485 // added by the user), and we were going to create a new one (because
15486 // this .keep is invalid, or cache file format version has changed),
15487 // remove it and create the new one with this same .keep extension,
15488 // so it stays (as wished by the user) not maintained by crengine.
15489 lString32 fn_keep = pathname + ".keep";
15490 if ( LVFileExists( fn_keep ) ) {
15491 LVDeleteFile( pathname ); // delete .cr3 if any
15492 LVDeleteFile( fn_keep ); // delete invalid .cr3.keep
15493 LVStreamRef stream = LVOpenFileStream( fn_keep.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15494 if ( !stream.isNull() ) {
15495 CRLog::info( "ldomDocCache::createNew - re-creating user renamed cache file %s", UnicodeToUtf8(fn_keep).c_str() );
15496 cachePath = fn_keep;
15497 #if ENABLED_BLOCK_WRITE_CACHE
15498 stream = LVCreateBlockWriteStream( stream, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15499 #endif
15500 return stream;
15501 }
15502 }
15503 if ( findFileIndex( pathname ) >= 0 )
15504 LVDeleteFile( pathname );
15505 reserve( fileSize/10 );
15506 //res = LVMapFileStream( (_cacheDir+fn).c_str(), LVOM_APPEND, fileSize );
15507 LVDeleteFile( pathname ); // try to delete, ignore errors
15508 res = LVOpenFileStream( pathname.c_str(), LVOM_APPEND|LVOM_FLAG_SYNC );
15509 if ( !res ) {
15510 CRLog::error( "ldomDocCache::createNew - file %s is cannot be created", UnicodeToUtf8(fn).c_str() );
15511 return res;
15512 }
15513 cachePath = pathname;
15514 #if ENABLED_BLOCK_WRITE_CACHE
15515 res = LVCreateBlockWriteStream( res, WRITE_CACHE_BLOCK_SIZE, WRITE_CACHE_BLOCK_COUNT );
15516 #if TEST_BLOCK_STREAM
15517 LVStreamRef stream2 = LVOpenFileStream( (pathname+U"_c").c_str(), LVOM_APPEND );
15518 if ( !stream2 ) {
15519 CRLog::error( "ldomDocCache::createNew - file %s is cannot be created", UnicodeToUtf8(fn).c_str() );
15520 return stream2;
15521 }
15522 res = LVCreateCompareTestStream(res, stream2);
15523 #endif
15524 #endif
15525 moveFileToTop( fn, fileSize );
15526 return res;
15527 }
15528
~ldomDocCacheImpl()15529 virtual ~ldomDocCacheImpl()
15530 {
15531 }
15532 };
15533
15534 static ldomDocCacheImpl * _cacheInstance = NULL;
15535
init(lString32 cacheDir,lvsize_t maxSize)15536 bool ldomDocCache::init( lString32 cacheDir, lvsize_t maxSize )
15537 {
15538 if ( _cacheInstance )
15539 delete _cacheInstance;
15540 CRLog::info("Initialize document cache at %s (max size = %d)", UnicodeToUtf8(cacheDir).c_str(), (int)maxSize );
15541 _cacheInstance = new ldomDocCacheImpl( cacheDir, maxSize );
15542 if ( !_cacheInstance->init() ) {
15543 delete _cacheInstance;
15544 _cacheInstance = NULL;
15545 return false;
15546 }
15547 return true;
15548 }
15549
close()15550 bool ldomDocCache::close()
15551 {
15552 if ( !_cacheInstance )
15553 return false;
15554 delete _cacheInstance;
15555 _cacheInstance = NULL;
15556 return true;
15557 }
15558
15559 /// open existing cache file stream
openExisting(lString32 filename,lUInt32 crc,lUInt32 docFlags,lString32 & cachePath)15560 LVStreamRef ldomDocCache::openExisting( lString32 filename, lUInt32 crc, lUInt32 docFlags, lString32 &cachePath )
15561 {
15562 if ( !_cacheInstance )
15563 return LVStreamRef();
15564 return _cacheInstance->openExisting( filename, crc, docFlags, cachePath );
15565 }
15566
15567 /// create new cache file
createNew(lString32 filename,lUInt32 crc,lUInt32 docFlags,lUInt32 fileSize,lString32 & cachePath)15568 LVStreamRef ldomDocCache::createNew( lString32 filename, lUInt32 crc, lUInt32 docFlags, lUInt32 fileSize, lString32 &cachePath )
15569 {
15570 if ( !_cacheInstance )
15571 return LVStreamRef();
15572 return _cacheInstance->createNew( filename, crc, docFlags, fileSize, cachePath );
15573 }
15574
15575 /// delete all cache files
clear()15576 bool ldomDocCache::clear()
15577 {
15578 if ( !_cacheInstance )
15579 return false;
15580 return _cacheInstance->clear();
15581 }
15582
15583 /// returns true if cache is enabled (successfully initialized)
enabled()15584 bool ldomDocCache::enabled()
15585 {
15586 return _cacheInstance!=NULL;
15587 }
15588
15589 //void calcStyleHash( ldomNode * node, lUInt32 & value )
15590 //{
15591 // if ( !node )
15592 // return;
15593 //
15594 // if ( node->isText() || node->getRendMethod()==erm_invisible ) {
15595 // value = value * 75 + 1673251;
15596 // return; // don't go through invisible nodes
15597 // }
15598 //
15599 // css_style_ref_t style = node->getStyle();
15600 // font_ref_t font = node->getFont();
15601 // lUInt32 styleHash = (!style) ? 4324324 : calcHash( style );
15602 // lUInt32 fontHash = (!font) ? 256371 : calcHash( font );
15603 // value = (value*75 + styleHash) * 75 + fontHash;
15604 //
15605 // int cnt = node->getChildCount();
15606 // for ( int i=0; i<cnt; i++ ) {
15607 // calcStyleHash( node->getChildNode(i), value );
15608 // }
15609 //}
15610
15611
15612 #if BUILD_LITE!=1
15613
15614 /// save document formatting parameters after render
updateRenderContext()15615 void ldomDocument::updateRenderContext()
15616 {
15617 int dx = _page_width;
15618 int dy = _page_height;
15619 _nodeStyleHash = 0; // force recalculation by calcStyleHash()
15620 lUInt32 styleHash = calcStyleHash(_rendered);
15621 lUInt32 stylesheetHash = (((_stylesheet.getHash() * 31) + calcHash(_def_style))*31 + calcHash(_def_font));
15622 //calcStyleHash( getRootNode(), styleHash );
15623 _hdr.render_style_hash = styleHash;
15624 _hdr.stylesheet_hash = stylesheetHash;
15625 _hdr.render_dx = dx;
15626 _hdr.render_dy = dy;
15627 _hdr.render_docflags = _docFlags;
15628 _hdr.node_displaystyle_hash = _nodeDisplayStyleHashInitial; // we keep using the initial one
15629 CRLog::info("Updating render properties: styleHash=%x, stylesheetHash=%x, docflags=%x, width=%x, height=%x, nodeDisplayStyleHash=%x",
15630 _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy, _hdr.node_displaystyle_hash);
15631 }
15632
15633 /// check document formatting parameters before render - whether we need to reformat; returns false if render is necessary
checkRenderContext()15634 bool ldomDocument::checkRenderContext()
15635 {
15636 bool res = true;
15637 ldomNode * node = getRootNode();
15638 if (node != NULL && node->getFont().isNull()) {
15639 // This may happen when epubfmt.cpp has called forceReinitStyles()
15640 // because the EPUB contains embedded fonts: a full nodes styles
15641 // re-init is needed to use the new fonts (only available at end
15642 // of loading)
15643 CRLog::info("checkRenderContext: style is not set for root node");
15644 res = false;
15645 }
15646 int dx = _page_width;
15647 int dy = _page_height;
15648 lUInt32 styleHash = calcStyleHash(_rendered);
15649 lUInt32 stylesheetHash = (((_stylesheet.getHash() * 31) + calcHash(_def_style))*31 + calcHash(_def_font));
15650 //calcStyleHash( getRootNode(), styleHash );
15651 if ( styleHash != _hdr.render_style_hash ) {
15652 CRLog::info("checkRenderContext: Style hash doesn't match %x!=%x", styleHash, _hdr.render_style_hash);
15653 res = false;
15654 if (_just_rendered_from_cache)
15655 printf("CRE WARNING: cached rendering is invalid (style hash mismatch): doing full rendering\n");
15656 } else if ( stylesheetHash != _hdr.stylesheet_hash ) {
15657 CRLog::info("checkRenderContext: Stylesheet hash doesn't match %x!=%x", stylesheetHash, _hdr.stylesheet_hash);
15658 res = false;
15659 if (_just_rendered_from_cache)
15660 printf("CRE WARNING: cached rendering is invalid (stylesheet hash mismatch): doing full rendering\n");
15661 } else if ( _docFlags != _hdr.render_docflags ) {
15662 CRLog::info("checkRenderContext: Doc flags don't match %x!=%x", _docFlags, _hdr.render_docflags);
15663 res = false;
15664 if (_just_rendered_from_cache)
15665 printf("CRE WARNING: cached rendering is invalid (doc flags mismatch): doing full rendering\n");
15666 } else if ( dx != (int)_hdr.render_dx ) {
15667 CRLog::info("checkRenderContext: Width doesn't match %x!=%x", dx, (int)_hdr.render_dx);
15668 res = false;
15669 if (_just_rendered_from_cache)
15670 printf("CRE WARNING: cached rendering is invalid (page width mismatch): doing full rendering\n");
15671 } else if ( dy != (int)_hdr.render_dy ) {
15672 CRLog::info("checkRenderContext: Page height doesn't match %x!=%x", dy, (int)_hdr.render_dy);
15673 res = false;
15674 if (_just_rendered_from_cache)
15675 printf("CRE WARNING: cached rendering is invalid (page height mismatch): doing full rendering\n");
15676 }
15677 // no need to check for _nodeDisplayStyleHash != _hdr.node_displaystyle_hash:
15678 // this is implicitely done by styleHash != _hdr.render_style_hash (whose _nodeDisplayStyleHash is a subset)
15679 _just_rendered_from_cache = false;
15680 if ( res ) {
15681
15682 //if ( pages->length()==0 ) {
15683 // _pagesData.reset();
15684 // pages->deserialize( _pagesData );
15685 //}
15686
15687 return true;
15688 }
15689 // _hdr.render_style_hash = styleHash;
15690 // _hdr.stylesheet_hash = stylesheetHash;
15691 // _hdr.render_dx = dx;
15692 // _hdr.render_dy = dy;
15693 // _hdr.render_docflags = _docFlags;
15694 // CRLog::info("New render properties: styleHash=%x, stylesheetHash=%x, docflags=%04x, width=%d, height=%d",
15695 // _hdr.render_style_hash, _hdr.stylesheet_hash, _hdr.render_docflags, _hdr.render_dx, _hdr.render_dy);
15696 return false;
15697 }
15698
15699 #endif
15700
setStyleSheet(const char * css,bool replace)15701 void lxmlDocBase::setStyleSheet( const char * css, bool replace )
15702 {
15703 lString8 s(css);
15704
15705 //CRLog::trace("lxmlDocBase::setStyleSheet(length:%d replace:%s css text hash: %x)", strlen(css), replace ? "yes" : "no", s.getHash());
15706 lUInt32 oldHash = _stylesheet.getHash();
15707 if ( replace ) {
15708 //CRLog::debug("cleaning stylesheet contents");
15709 _stylesheet.clear();
15710 }
15711 if ( css && *css ) {
15712 //CRLog::debug("appending stylesheet contents: \n%s", css);
15713 _stylesheet.parse( css, true );
15714 // We use override_important=true: we are the only code
15715 // that sets the main CSS (including style tweaks). We allow
15716 // any !important to override any previous !important.
15717 // Other calls to _stylesheet.parse() elsewhere are used to
15718 // include document embedded or inline CSS, with the default
15719 // of override_important=false, so they won't override
15720 // the ones we set here.
15721 }
15722 lUInt32 newHash = _stylesheet.getHash();
15723 if (oldHash != newHash) {
15724 CRLog::debug("New stylesheet hash: %08x", newHash);
15725 }
15726 }
15727
15728
15729
15730
15731
15732
15733 //=====================================================
15734 // ldomElement declaration placed here to hide DOM implementation
15735 // use ldomNode rich interface instead
15736 class tinyElement
15737 {
15738 friend struct ldomNode;
15739 private:
15740 ldomDocument * _document;
15741 ldomNode * _parentNode;
15742 lUInt16 _id;
15743 lUInt16 _nsid;
15744 LVArray < lInt32 > _children;
15745 ldomAttributeCollection _attrs;
15746 lvdom_element_render_method _rendMethod;
15747 public:
tinyElement(ldomDocument * document,ldomNode * parentNode,lUInt16 nsid,lUInt16 id)15748 tinyElement( ldomDocument * document, ldomNode * parentNode, lUInt16 nsid, lUInt16 id )
15749 : _document(document), _parentNode(parentNode), _id(id), _nsid(nsid), _rendMethod(erm_invisible)
15750 { _document->_tinyElementCount++; }
15751 /// destructor
~tinyElement()15752 ~tinyElement() { _document->_tinyElementCount--; }
15753 };
15754
15755
15756 #define NPELEM _data._elem_ptr
15757 #define NPTEXT _data._text_ptr._str
15758
15759 //=====================================================
15760
15761 /// minimize memory consumption
compact()15762 void tinyNodeCollection::compact()
15763 {
15764 _textStorage.compact(0xFFFFFF);
15765 _elemStorage.compact(0xFFFFFF);
15766 _rectStorage.compact(0xFFFFFF);
15767 _styleStorage.compact(0xFFFFFF);
15768 }
15769
15770 /// allocate new tinyElement
allocTinyElement(ldomNode * parent,lUInt16 nsid,lUInt16 id)15771 ldomNode * tinyNodeCollection::allocTinyElement( ldomNode * parent, lUInt16 nsid, lUInt16 id )
15772 {
15773 ldomNode * node = allocTinyNode( ldomNode::NT_ELEMENT );
15774 tinyElement * elem = new tinyElement( (ldomDocument*)this, parent, nsid, id );
15775 node->NPELEM = elem;
15776 return node;
15777 }
15778
readOnlyError()15779 static void readOnlyError()
15780 {
15781 crFatalError( 125, "Text node is persistent (read-only)! Call modify() to get r/w instance." );
15782 }
15783
15784 //=====================================================
15785
15786 // shortcut for dynamic element accessor
15787 #ifdef _DEBUG
15788 #define ASSERT_NODE_NOT_NULL \
15789 if ( isNull() ) \
15790 crFatalError( 1313, "Access to null node" )
15791 #else
15792 #define ASSERT_NODE_NOT_NULL
15793 #endif
15794
15795 /// returns node level, 0 is root node
getNodeLevel() const15796 lUInt8 ldomNode::getNodeLevel() const
15797 {
15798 const ldomNode * node = this;
15799 int level = 0;
15800 for ( ; node; node = node->getParentNode() )
15801 level++;
15802 return (lUInt8)level;
15803 }
15804
onCollectionDestroy()15805 void ldomNode::onCollectionDestroy()
15806 {
15807 if ( isNull() )
15808 return;
15809 //CRLog::trace("ldomNode::onCollectionDestroy(%d) type=%d", this->_handle._dataIndex, TNTYPE);
15810 switch ( TNTYPE ) {
15811 case NT_TEXT:
15812 delete _data._text_ptr;
15813 _data._text_ptr = NULL;
15814 break;
15815 case NT_ELEMENT:
15816 // ???
15817 #if BUILD_LITE!=1
15818 getDocument()->clearNodeStyle( _handle._dataIndex );
15819 #endif
15820 delete NPELEM;
15821 NPELEM = NULL;
15822 break;
15823 #if BUILD_LITE!=1
15824 case NT_PTEXT: // immutable (persistent) text node
15825 // do nothing
15826 break;
15827 case NT_PELEMENT: // immutable (persistent) element node
15828 // do nothing
15829 break;
15830 #endif
15831 }
15832 }
15833
destroy()15834 void ldomNode::destroy()
15835 {
15836 if ( isNull() )
15837 return;
15838 //CRLog::trace("ldomNode::destroy(%d) type=%d", this->_handle._dataIndex, TNTYPE);
15839 switch ( TNTYPE ) {
15840 case NT_TEXT:
15841 delete _data._text_ptr;
15842 break;
15843 case NT_ELEMENT:
15844 {
15845 #if BUILD_LITE!=1
15846 getDocument()->clearNodeStyle(_handle._dataIndex);
15847 #endif
15848 tinyElement * me = NPELEM;
15849 // delete children
15850 for ( int i=0; i<me->_children.length(); i++ ) {
15851 ldomNode * child = getDocument()->getTinyNode(me->_children[i]);
15852 if ( child )
15853 child->destroy();
15854 }
15855 delete me;
15856 NPELEM = NULL;
15857 }
15858 delete NPELEM;
15859 break;
15860 #if BUILD_LITE!=1
15861 case NT_PTEXT:
15862 // disable removing from storage: to minimize modifications
15863 //_document->_textStorage.freeNode( _data._ptext_addr._addr );
15864 break;
15865 case NT_PELEMENT: // immutable (persistent) element node
15866 {
15867 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
15868 for ( int i=0; i<me->childCount; i++ )
15869 getDocument()->getTinyNode( me->children[i] )->destroy();
15870 getDocument()->clearNodeStyle( _handle._dataIndex );
15871 // getDocument()->_styles.release( _data._pelem._styleIndex );
15872 // getDocument()->_fonts.release( _data._pelem._fontIndex );
15873 // _data._pelem._styleIndex = 0;
15874 // _data._pelem._fontIndex = 0;
15875 getDocument()->_elemStorage.freeNode( _data._pelem_addr );
15876 }
15877 break;
15878 #endif
15879 }
15880 getDocument()->recycleTinyNode( _handle._dataIndex );
15881 }
15882
15883 /// returns index of child node by dataIndex
getChildIndex(lUInt32 dataIndex) const15884 int ldomNode::getChildIndex( lUInt32 dataIndex ) const
15885 {
15886 // was here and twice below: dataIndex &= 0xFFFFFFF0;
15887 // The lowest bits of a dataIndex carry properties about the node:
15888 // bit 0: 0 = text node / 1 = element node
15889 // bit 1: 0 = mutable node / 1 = immutable (persistent, cached)
15890 // (So, all Text nodes have an even dataIndex, and Element nodes
15891 // all have a odd dataIndex.)
15892 // This '& 0xFFFFFFF0' was to clear these properties so a same
15893 // node can be found if these properties change (mostly useful
15894 // with mutable<>persistent).
15895 // But text nodes and Element nodes use different independant counters
15896 // (see tinyNodeCollection::allocTinyNode(): _elemCount++, _textCount++)
15897 // and we may have a text node with dataIndex 8528, and an element
15898 // node with dataIndex 8529, that would be confused with each other
15899 // if we use 0xFFFFFFF0.
15900 // This could cause finding the wrong node, and strange side effects.
15901 // With '& 0xFFFFFFF1' keep the lowest bit.
15902 dataIndex &= 0xFFFFFFF1;
15903 ASSERT_NODE_NOT_NULL;
15904 int parentIndex = -1;
15905 switch ( TNTYPE ) {
15906 case NT_ELEMENT:
15907 {
15908 tinyElement * me = NPELEM;
15909 for ( int i=0; i<me->_children.length(); i++ ) {
15910 if ( (me->_children[i] & 0xFFFFFFF1) == dataIndex ) {
15911 // found
15912 parentIndex = i;
15913 break;
15914 }
15915 }
15916 }
15917 break;
15918 #if BUILD_LITE!=1
15919 case NT_PELEMENT:
15920 {
15921 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
15922 for ( int i=0; i<me->childCount; i++ ) {
15923 if ( (me->children[i] & 0xFFFFFFF1) == dataIndex ) {
15924 // found
15925 parentIndex = i;
15926 break;
15927 }
15928 }
15929 }
15930 break;
15931 case NT_PTEXT: // immutable (persistent) text node
15932 #endif
15933 case NT_TEXT:
15934 break;
15935 }
15936 return parentIndex;
15937 }
15938
15939 /// returns index of node inside parent's child collection
getNodeIndex() const15940 int ldomNode::getNodeIndex() const
15941 {
15942 ASSERT_NODE_NOT_NULL;
15943 ldomNode * parent = getParentNode();
15944 if ( parent )
15945 return parent->getChildIndex( getDataIndex() );
15946 return 0;
15947 }
15948
15949 /// returns true if node is document's root
isRoot() const15950 bool ldomNode::isRoot() const
15951 {
15952 ASSERT_NODE_NOT_NULL;
15953 switch ( TNTYPE ) {
15954 case NT_ELEMENT:
15955 return !NPELEM->_parentNode;
15956 #if BUILD_LITE!=1
15957 case NT_PELEMENT: // immutable (persistent) element node
15958 {
15959 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
15960 return me->parentIndex==0;
15961 }
15962 break;
15963 case NT_PTEXT: // immutable (persistent) text node
15964 {
15965 return getDocument()->_textStorage.getParent( _data._ptext_addr )==0;
15966 }
15967 #endif
15968 case NT_TEXT:
15969 return _data._text_ptr->getParentIndex()==0;
15970 }
15971 return false;
15972 }
15973
15974 /// call to invalidate cache if persistent node content is modified
modified()15975 void ldomNode::modified()
15976 {
15977 #if BUILD_LITE!=1
15978 if ( isPersistent() ) {
15979 if ( isElement() )
15980 getDocument()->_elemStorage.modified( _data._pelem_addr );
15981 else
15982 getDocument()->_textStorage.modified( _data._ptext_addr );
15983 }
15984 #endif
15985 }
15986
15987 /// changes parent of item
setParentNode(ldomNode * parent)15988 void ldomNode::setParentNode( ldomNode * parent )
15989 {
15990 ASSERT_NODE_NOT_NULL;
15991 #ifdef TRACE_AUTOBOX
15992 if ( getParentNode()!=NULL && parent != NULL )
15993 CRLog::trace("Changing parent of %d from %d to %d", getDataIndex(), getParentNode()->getDataIndex(), parent->getDataIndex());
15994 #endif
15995 switch ( TNTYPE ) {
15996 case NT_ELEMENT:
15997 NPELEM->_parentNode = parent;
15998 break;
15999 #if BUILD_LITE!=1
16000 case NT_PELEMENT: // immutable (persistent) element node
16001 {
16002 lUInt32 parentIndex = parent->_handle._dataIndex;
16003 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16004 if ( me->parentIndex != (int)parentIndex ) {
16005 me->parentIndex = parentIndex;
16006 modified();
16007 }
16008 }
16009 break;
16010 case NT_PTEXT: // immutable (persistent) text node
16011 {
16012 lUInt32 parentIndex = parent->_handle._dataIndex;
16013 getDocument()->_textStorage.setParent(_data._ptext_addr, parentIndex);
16014 //_data._ptext_addr._parentIndex = parentIndex;
16015 //_document->_textStorage.setTextParent( _data._ptext_addr._addr, parentIndex );
16016 }
16017 break;
16018 #endif
16019 case NT_TEXT:
16020 {
16021 lUInt32 parentIndex = parent->_handle._dataIndex;
16022 _data._text_ptr->setParentIndex( parentIndex );
16023 }
16024 break;
16025 }
16026 }
16027
16028 /// returns dataIndex of node's parent, 0 if no parent
getParentIndex() const16029 int ldomNode::getParentIndex() const
16030 {
16031 ASSERT_NODE_NOT_NULL;
16032
16033 switch ( TNTYPE ) {
16034 case NT_ELEMENT:
16035 return NPELEM->_parentNode ? NPELEM->_parentNode->getDataIndex() : 0;
16036 #if BUILD_LITE!=1
16037 case NT_PELEMENT: // immutable (persistent) element node
16038 {
16039 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16040 return me->parentIndex;
16041 }
16042 break;
16043 case NT_PTEXT: // immutable (persistent) text node
16044 return getDocument()->_textStorage.getParent(_data._ptext_addr);
16045 #endif
16046 case NT_TEXT:
16047 return _data._text_ptr->getParentIndex();
16048 }
16049 return 0;
16050 }
16051
16052 /// returns pointer to parent node, NULL if node has no parent
getParentNode() const16053 ldomNode * ldomNode::getParentNode() const
16054 {
16055 ASSERT_NODE_NOT_NULL;
16056 int parentIndex = 0;
16057 switch ( TNTYPE ) {
16058 case NT_ELEMENT:
16059 return NPELEM->_parentNode;
16060 #if BUILD_LITE!=1
16061 case NT_PELEMENT: // immutable (persistent) element node
16062 {
16063 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16064 parentIndex = me->parentIndex;
16065 }
16066 break;
16067 case NT_PTEXT: // immutable (persistent) text node
16068 parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
16069 break;
16070 #endif
16071 case NT_TEXT:
16072 parentIndex = _data._text_ptr->getParentIndex();
16073 break;
16074 }
16075 return parentIndex ? getTinyNode(parentIndex) : NULL;
16076 }
16077
16078 /// returns true child node is element
isChildNodeElement(lUInt32 index) const16079 bool ldomNode::isChildNodeElement( lUInt32 index ) const
16080 {
16081 ASSERT_NODE_NOT_NULL;
16082 #if BUILD_LITE!=1
16083 if ( !isPersistent() ) {
16084 #endif
16085 // element
16086 tinyElement * me = NPELEM;
16087 int n = me->_children[index];
16088 return ( (n & 1)==1 );
16089 #if BUILD_LITE!=1
16090 } else {
16091 // persistent element
16092 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16093 int n = me->children[index];
16094 return ( (n & 1)==1 );
16095 }
16096 #endif
16097 }
16098
16099 /// returns true child node is text
isChildNodeText(lUInt32 index) const16100 bool ldomNode::isChildNodeText( lUInt32 index ) const
16101 {
16102 ASSERT_NODE_NOT_NULL;
16103 #if BUILD_LITE!=1
16104 if ( !isPersistent() ) {
16105 #endif
16106 // element
16107 tinyElement * me = NPELEM;
16108 int n = me->_children[index];
16109 return ( (n & 1)==0 );
16110 #if BUILD_LITE!=1
16111 } else {
16112 // persistent element
16113 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16114 int n = me->children[index];
16115 return ( (n & 1)==0 );
16116 }
16117 #endif
16118 }
16119
16120 /// returns child node by index, NULL if node with this index is not element or nodeTag!=0 and element node name!=nodeTag
getChildElementNode(lUInt32 index,const lChar32 * nodeTag) const16121 ldomNode * ldomNode::getChildElementNode( lUInt32 index, const lChar32 * nodeTag ) const
16122 {
16123 lUInt16 nodeId = getDocument()->getElementNameIndex(nodeTag);
16124 return getChildElementNode( index, nodeId );
16125 }
16126
16127 /// returns child node by index, NULL if node with this index is not element or nodeId!=0 and element node id!=nodeId
getChildElementNode(lUInt32 index,lUInt16 nodeId) const16128 ldomNode * ldomNode::getChildElementNode( lUInt32 index, lUInt16 nodeId ) const
16129 {
16130 ASSERT_NODE_NOT_NULL;
16131 ldomNode * res = NULL;
16132 #if BUILD_LITE!=1
16133 if ( !isPersistent() ) {
16134 #endif
16135 // element
16136 tinyElement * me = NPELEM;
16137 int n = me->_children[index];
16138 if ( (n & 1)==0 ) // not element
16139 return NULL;
16140 res = getTinyNode( n );
16141 #if BUILD_LITE!=1
16142 } else {
16143 // persistent element
16144 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16145 int n = me->children[index];
16146 if ( (n & 1)==0 ) // not element
16147 return NULL;
16148 res = getTinyNode( n );
16149 }
16150 #endif
16151 if ( res && nodeId!=0 && res->getNodeId()!=nodeId )
16152 res = NULL;
16153 return res;
16154 }
16155
16156 /// returns child node by index
getChildNode(lUInt32 index) const16157 ldomNode * ldomNode::getChildNode( lUInt32 index ) const
16158 {
16159 ASSERT_NODE_NOT_NULL;
16160 #if BUILD_LITE!=1
16161 if ( !isPersistent() ) {
16162 #endif
16163 // element
16164 tinyElement * me = NPELEM;
16165 return getTinyNode( me->_children[index] );
16166 #if BUILD_LITE!=1
16167 } else {
16168 // persistent element
16169 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16170 return getTinyNode( me->children[index] );
16171 }
16172 #endif
16173 }
16174
16175 /// returns element child count
getChildCount() const16176 int ldomNode::getChildCount() const
16177 {
16178 ASSERT_NODE_NOT_NULL;
16179 if ( !isElement() )
16180 return 0;
16181 #if BUILD_LITE!=1
16182 if ( !isPersistent() ) {
16183 #endif
16184 // element
16185 tinyElement * me = NPELEM;
16186 return me->_children.length();
16187 #if BUILD_LITE!=1
16188 } else {
16189 // persistent element
16190 // persistent element
16191 {
16192 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16193 // if ( me==NULL ) { // DEBUG
16194 // me = _document->_elemStorage.getElem( _data._pelem_addr );
16195 // }
16196 return me->childCount;
16197 }
16198 }
16199 #endif
16200 }
16201
16202 /// returns element attribute count
getAttrCount() const16203 int ldomNode::getAttrCount() const
16204 {
16205 ASSERT_NODE_NOT_NULL;
16206 if ( !isElement() )
16207 return 0;
16208 #if BUILD_LITE!=1
16209 if ( !isPersistent() ) {
16210 #endif
16211 // element
16212 tinyElement * me = NPELEM;
16213 return me->_attrs.length();
16214 #if BUILD_LITE!=1
16215 } else {
16216 // persistent element
16217 {
16218 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16219 return me->attrCount;
16220 }
16221 }
16222 #endif
16223 }
16224
16225 /// returns attribute value by attribute name id and namespace id
getAttributeValue(lUInt16 nsid,lUInt16 id) const16226 const lString32 & ldomNode::getAttributeValue( lUInt16 nsid, lUInt16 id ) const
16227 {
16228 ASSERT_NODE_NOT_NULL;
16229 if ( !isElement() )
16230 return lString32::empty_str;
16231 #if BUILD_LITE!=1
16232 if ( !isPersistent() ) {
16233 #endif
16234 // element
16235 tinyElement * me = NPELEM;
16236 lUInt32 valueId = me->_attrs.get( nsid, id );
16237 if ( valueId==LXML_ATTR_VALUE_NONE )
16238 return lString32::empty_str;
16239 return getDocument()->getAttrValue(valueId);
16240 #if BUILD_LITE!=1
16241 } else {
16242 // persistent element
16243 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16244 lUInt32 valueId = me->getAttrValueId( nsid, id );
16245 if ( valueId==LXML_ATTR_VALUE_NONE )
16246 return lString32::empty_str;
16247 return getDocument()->getAttrValue(valueId);
16248 }
16249 #endif
16250 }
16251
16252 /// returns attribute value by attribute name and namespace
getAttributeValue(const lChar32 * nsName,const lChar32 * attrName) const16253 const lString32 & ldomNode::getAttributeValue( const lChar32 * nsName, const lChar32 * attrName ) const
16254 {
16255 ASSERT_NODE_NOT_NULL;
16256 lUInt16 nsId = (nsName && nsName[0]) ? getDocument()->getNsNameIndex( nsName ) : LXML_NS_ANY;
16257 lUInt16 attrId = getDocument()->getAttrNameIndex( attrName );
16258 return getAttributeValue( nsId, attrId );
16259 }
16260
16261 /// returns attribute value by attribute name and namespace
getAttributeValue(const lChar8 * nsName,const lChar8 * attrName) const16262 const lString32 & ldomNode::getAttributeValue( const lChar8 * nsName, const lChar8 * attrName ) const
16263 {
16264 ASSERT_NODE_NOT_NULL;
16265 lUInt16 nsId = (nsName && nsName[0]) ? getDocument()->getNsNameIndex( nsName ) : LXML_NS_ANY;
16266 lUInt16 attrId = getDocument()->getAttrNameIndex( attrName );
16267 return getAttributeValue( nsId, attrId );
16268 }
16269
16270 /// returns attribute by index
getAttribute(lUInt32 index) const16271 const lxmlAttribute * ldomNode::getAttribute( lUInt32 index ) const
16272 {
16273 ASSERT_NODE_NOT_NULL;
16274 if ( !isElement() )
16275 return NULL;
16276 #if BUILD_LITE!=1
16277 if ( !isPersistent() ) {
16278 #endif
16279 // element
16280 tinyElement * me = NPELEM;
16281 return me->_attrs[index];
16282 #if BUILD_LITE!=1
16283 } else {
16284 // persistent element
16285 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16286 return me->attr( index );
16287 }
16288 #endif
16289 }
16290
16291 /// returns true if element node has attribute with specified name id and namespace id
hasAttribute(lUInt16 nsid,lUInt16 id) const16292 bool ldomNode::hasAttribute( lUInt16 nsid, lUInt16 id ) const
16293 {
16294 ASSERT_NODE_NOT_NULL;
16295 if ( !isElement() )
16296 return false;
16297 #if BUILD_LITE!=1
16298 if ( !isPersistent() ) {
16299 #endif
16300 // element
16301 tinyElement * me = NPELEM;
16302 lUInt32 valueId = me->_attrs.get( nsid, id );
16303 return ( valueId!=LXML_ATTR_VALUE_NONE );
16304 #if BUILD_LITE!=1
16305 } else {
16306 // persistent element
16307 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16308 return (me->findAttr( nsid, id ) != NULL);
16309 }
16310 #endif
16311 }
16312
16313 /// returns attribute name by index
getAttributeName(lUInt32 index) const16314 const lString32 & ldomNode::getAttributeName( lUInt32 index ) const
16315 {
16316 ASSERT_NODE_NOT_NULL;
16317 const lxmlAttribute * attr = getAttribute( index );
16318 if ( attr )
16319 return getDocument()->getAttrName( attr->id );
16320 return lString32::empty_str;
16321 }
16322
16323 /// sets attribute value
setAttributeValue(lUInt16 nsid,lUInt16 id,const lChar32 * value)16324 void ldomNode::setAttributeValue( lUInt16 nsid, lUInt16 id, const lChar32 * value )
16325 {
16326 ASSERT_NODE_NOT_NULL;
16327 if ( !isElement() )
16328 return;
16329 lUInt32 valueIndex = getDocument()->getAttrValueIndex(value);
16330 #if BUILD_LITE!=1
16331 if ( isPersistent() ) {
16332 // persistent element
16333 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16334 lxmlAttribute * attr = me->findAttr( nsid, id );
16335 if ( attr ) {
16336 attr->index = valueIndex;
16337 modified();
16338 return;
16339 }
16340 // else: convert to modifable and continue as non-persistent
16341 modify();
16342 }
16343 #endif
16344 // element
16345 tinyElement * me = NPELEM;
16346 me->_attrs.set(nsid, id, valueIndex);
16347 if (nsid == LXML_NS_NONE)
16348 getDocument()->onAttributeSet( id, valueIndex, this );
16349 }
16350
16351 /// returns attribute value by attribute name id, looking at children if needed
getFirstInnerAttributeValue(lUInt16 nsid,lUInt16 id) const16352 const lString32 & ldomNode::getFirstInnerAttributeValue( lUInt16 nsid, lUInt16 id ) const
16353 {
16354 ASSERT_NODE_NOT_NULL;
16355 if (hasAttribute(nsid, id))
16356 return getAttributeValue(nsid, id);
16357 ldomNode * n = (ldomNode *) this;
16358 if (n->isElement() && n->getChildCount() > 0) {
16359 int nextChildIndex = 0;
16360 n = n->getChildNode(nextChildIndex);
16361 while (true) {
16362 // Check only the first time we met a node (nextChildIndex == 0)
16363 // and not when we get back to it from a child to process next sibling
16364 if (nextChildIndex == 0) {
16365 if (n->isElement() && n->hasAttribute(nsid, id))
16366 return n->getAttributeValue(nsid, id);
16367 }
16368 // Process next child
16369 if (n->isElement() && nextChildIndex < n->getChildCount()) {
16370 n = n->getChildNode(nextChildIndex);
16371 nextChildIndex = 0;
16372 continue;
16373 }
16374 // No more child, get back to parent and have it process our sibling
16375 nextChildIndex = n->getNodeIndex() + 1;
16376 n = n->getParentNode();
16377 if (!n) // back to root node
16378 break;
16379 if (n == this && nextChildIndex >= n->getChildCount())
16380 // back to this node, and done with its children
16381 break;
16382 }
16383 }
16384 return lString32::empty_str;
16385 }
16386
16387 /// returns element type structure pointer if it was set in document for this element name
getElementTypePtr()16388 const css_elem_def_props_t * ldomNode::getElementTypePtr()
16389 {
16390 ASSERT_NODE_NOT_NULL;
16391 if ( !isElement() )
16392 return NULL;
16393 #if BUILD_LITE!=1
16394 if ( !isPersistent() ) {
16395 #endif
16396 // element
16397 const css_elem_def_props_t * res = getDocument()->getElementTypePtr(NPELEM->_id);
16398 // if ( res && res->is_object ) {
16399 // CRLog::trace("Object found");
16400 // }
16401 return res;
16402 #if BUILD_LITE!=1
16403 } else {
16404 // persistent element
16405 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16406 const css_elem_def_props_t * res = getDocument()->getElementTypePtr(me->id);
16407 // if ( res && res->is_object ) {
16408 // CRLog::trace("Object found");
16409 // }
16410 return res;
16411 }
16412 #endif
16413 }
16414
16415 /// returns element name id
getNodeId() const16416 lUInt16 ldomNode::getNodeId() const
16417 {
16418 ASSERT_NODE_NOT_NULL;
16419 if ( !isElement() )
16420 return 0;
16421 #if BUILD_LITE!=1
16422 if ( !isPersistent() ) {
16423 // element
16424 #endif
16425 return NPELEM->_id;
16426 #if BUILD_LITE!=1
16427 } else {
16428 // persistent element
16429 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16430 return me->id;
16431 }
16432 #endif
16433 }
16434
16435 /// returns element namespace id
getNodeNsId() const16436 lUInt16 ldomNode::getNodeNsId() const
16437 {
16438 ASSERT_NODE_NOT_NULL;
16439 if ( !isElement() )
16440 return 0;
16441 #if BUILD_LITE!=1
16442 if ( !isPersistent() ) {
16443 // element
16444 #endif
16445 return NPELEM->_nsid;
16446 #if BUILD_LITE!=1
16447 } else {
16448 // persistent element
16449 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16450 return me->nsid;
16451 }
16452 #endif
16453 }
16454
16455 /// replace element name id with another value
setNodeId(lUInt16 id)16456 void ldomNode::setNodeId( lUInt16 id )
16457 {
16458 ASSERT_NODE_NOT_NULL;
16459 if ( !isElement() )
16460 return;
16461 #if BUILD_LITE!=1
16462 if ( !isPersistent() ) {
16463 // element
16464 #endif
16465 NPELEM->_id = id;
16466 #if BUILD_LITE!=1
16467 } else {
16468 // persistent element
16469 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16470 me->id = id;
16471 modified();
16472 }
16473 #endif
16474 }
16475
16476 /// returns element name
getNodeName() const16477 const lString32 & ldomNode::getNodeName() const
16478 {
16479 ASSERT_NODE_NOT_NULL;
16480 if ( !isElement() )
16481 return lString32::empty_str;
16482 #if BUILD_LITE!=1
16483 if ( !isPersistent() ) {
16484 // element
16485 #endif
16486 return getDocument()->getElementName(NPELEM->_id);
16487 #if BUILD_LITE!=1
16488 } else {
16489 // persistent element
16490 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16491 return getDocument()->getElementName(me->id);
16492 }
16493 #endif
16494 }
16495
16496 /// returns element name
isNodeName(const char * s) const16497 bool ldomNode::isNodeName(const char * s) const
16498 {
16499 ASSERT_NODE_NOT_NULL;
16500 if ( !isElement() )
16501 return false;
16502 lUInt16 index = getDocument()->findElementNameIndex(s);
16503 if (!index)
16504 return false;
16505 #if BUILD_LITE!=1
16506 if ( !isPersistent() ) {
16507 // element
16508 #endif
16509 return index == NPELEM->_id;
16510 #if BUILD_LITE!=1
16511 } else {
16512 // persistent element
16513 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16514 return index == me->id;
16515 }
16516 #endif
16517 }
16518
16519 /// returns element namespace name
getNodeNsName() const16520 const lString32 & ldomNode::getNodeNsName() const
16521 {
16522 ASSERT_NODE_NOT_NULL;
16523 if ( !isElement() )
16524 return lString32::empty_str;
16525 #if BUILD_LITE!=1
16526 if ( !isPersistent() ) {
16527 #endif
16528 // element
16529 return getDocument()->getNsName(NPELEM->_nsid);
16530 #if BUILD_LITE!=1
16531 } else {
16532 // persistent element
16533 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
16534 return getDocument()->getNsName(me->nsid);
16535 }
16536 #endif
16537 }
16538
16539
16540
16541 /// returns text node text as wide string
getText(lChar32 blockDelimiter,int maxSize) const16542 lString32 ldomNode::getText( lChar32 blockDelimiter, int maxSize ) const
16543 {
16544 ASSERT_NODE_NOT_NULL;
16545 switch ( TNTYPE ) {
16546 #if BUILD_LITE!=1
16547 case NT_PELEMENT:
16548 #endif
16549 case NT_ELEMENT:
16550 {
16551 lString32 txt;
16552 unsigned cc = getChildCount();
16553 for ( unsigned i=0; i<cc; i++ ) {
16554 ldomNode * child = getChildNode(i);
16555 txt += child->getText(blockDelimiter, maxSize);
16556 if (maxSize != 0 && txt.length() > maxSize)
16557 break;
16558 if (i >= cc - 1)
16559 break;
16560 #if BUILD_LITE!=1
16561 if ( blockDelimiter && child->isElement() ) {
16562 if ( !child->getStyle().isNull() && child->getStyle()->display == css_d_block )
16563 txt << blockDelimiter;
16564 }
16565 #endif
16566 }
16567 return txt;
16568 }
16569 break;
16570 #if BUILD_LITE!=1
16571 case NT_PTEXT:
16572 return Utf8ToUnicode(getDocument()->_textStorage.getText( _data._ptext_addr ));
16573 #endif
16574 case NT_TEXT:
16575 return _data._text_ptr->getText32();
16576 }
16577 return lString32::empty_str;
16578 }
16579
16580 /// returns text node text as utf8 string
getText8(lChar8 blockDelimiter,int maxSize) const16581 lString8 ldomNode::getText8( lChar8 blockDelimiter, int maxSize ) const
16582 {
16583 ASSERT_NODE_NOT_NULL;
16584 switch ( TNTYPE ) {
16585 case NT_ELEMENT:
16586 #if BUILD_LITE!=1
16587 case NT_PELEMENT:
16588 {
16589 lString8 txt;
16590 int cc = getChildCount();
16591 for (int i = 0; i < cc; i++) {
16592 ldomNode * child = getChildNode(i);
16593 txt += child->getText8(blockDelimiter, maxSize);
16594 if (maxSize != 0 && txt.length() > maxSize)
16595 break;
16596 if (i >= getChildCount() - 1)
16597 break;
16598 if ( blockDelimiter && child->isElement() ) {
16599 if ( child->getStyle()->display == css_d_block )
16600 txt << blockDelimiter;
16601 }
16602 }
16603 return txt;
16604 }
16605 break;
16606 case NT_PTEXT:
16607 return getDocument()->_textStorage.getText( _data._ptext_addr );
16608 #endif
16609 case NT_TEXT:
16610 return _data._text_ptr->getText();
16611 }
16612 return lString8::empty_str;
16613 }
16614
16615 /// sets text node text as wide string
setText(lString32 str)16616 void ldomNode::setText( lString32 str )
16617 {
16618 ASSERT_NODE_NOT_NULL;
16619 switch ( TNTYPE ) {
16620 case NT_ELEMENT:
16621 readOnlyError();
16622 break;
16623 #if BUILD_LITE!=1
16624 case NT_PELEMENT:
16625 readOnlyError();
16626 break;
16627 case NT_PTEXT:
16628 {
16629 // convert persistent text to mutable
16630 lUInt32 parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
16631 getDocument()->_textStorage.freeNode( _data._ptext_addr );
16632 _data._text_ptr = new ldomTextNode( parentIndex, UnicodeToUtf8(str) );
16633 // change type from PTEXT to TEXT
16634 _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_TEXT;
16635 }
16636 break;
16637 #endif
16638 case NT_TEXT:
16639 {
16640 _data._text_ptr->setText( str );
16641 }
16642 break;
16643 }
16644 }
16645
16646 /// sets text node text as utf8 string
setText8(lString8 utf8)16647 void ldomNode::setText8( lString8 utf8 )
16648 {
16649 ASSERT_NODE_NOT_NULL;
16650 switch ( TNTYPE ) {
16651 case NT_ELEMENT:
16652 readOnlyError();
16653 break;
16654 #if BUILD_LITE!=1
16655 case NT_PELEMENT:
16656 readOnlyError();
16657 break;
16658 case NT_PTEXT:
16659 {
16660 // convert persistent text to mutable
16661 lUInt32 parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
16662 getDocument()->_textStorage.freeNode( _data._ptext_addr );
16663 _data._text_ptr = new ldomTextNode( parentIndex, utf8 );
16664 // change type from PTEXT to TEXT
16665 _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_TEXT;
16666 }
16667 break;
16668 #endif
16669 case NT_TEXT:
16670 {
16671 _data._text_ptr->setText( utf8 );
16672 }
16673 break;
16674 }
16675 }
16676
16677 #if BUILD_LITE!=1
16678 /// returns node absolute rectangle
getAbsRect(lvRect & rect,bool inner)16679 void ldomNode::getAbsRect( lvRect & rect, bool inner )
16680 {
16681 ASSERT_NODE_NOT_NULL;
16682 ldomNode * node = this;
16683 RenderRectAccessor fmt( node );
16684 rect.left = fmt.getX();
16685 rect.top = fmt.getY();
16686 rect.right = fmt.getWidth();
16687 rect.bottom = fmt.getHeight();
16688 if ( inner && RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
16689 // This flag is set only when in enhanced rendering mode, and
16690 // only on erm_final nodes.
16691 rect.left += fmt.getInnerX(); // add padding left
16692 rect.top += fmt.getInnerY(); // add padding top
16693 rect.right = fmt.getInnerWidth(); // replace by inner width
16694 }
16695 node = node->getParentNode();
16696 for (; node; node = node->getParentNode())
16697 {
16698 RenderRectAccessor fmt( node );
16699 rect.left += fmt.getX();
16700 rect.top += fmt.getY();
16701 if ( RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET) ) {
16702 // getAbsRect() is mostly used on erm_final nodes. So,
16703 // if we meet another erm_final node in our parent, we are
16704 // probably an embedded floatBox or inlineBox. Embedded
16705 // floatBoxes or inlineBoxes are positioned according
16706 // to the inner LFormattedText, so we need to account
16707 // for these padding shifts.
16708 rect.left += fmt.getInnerX(); // add padding left
16709 rect.top += fmt.getInnerY(); // add padding top
16710 }
16711 }
16712 rect.bottom += rect.top;
16713 rect.right += rect.left;
16714 }
16715
16716 /// returns render data structure
getRenderData(lvdomElementFormatRec & dst)16717 void ldomNode::getRenderData( lvdomElementFormatRec & dst)
16718 {
16719 ASSERT_NODE_NOT_NULL;
16720 if ( !isElement() ) {
16721 dst.clear();
16722 return;
16723 }
16724 getDocument()->_rectStorage.getRendRectData(_handle._dataIndex, &dst);
16725 }
16726
16727 /// sets new value for render data structure
setRenderData(lvdomElementFormatRec & newData)16728 void ldomNode::setRenderData( lvdomElementFormatRec & newData)
16729 {
16730 ASSERT_NODE_NOT_NULL;
16731 if ( !isElement() )
16732 return;
16733 getDocument()->_rectStorage.setRendRectData(_handle._dataIndex, &newData);
16734 }
16735
16736 /// sets node rendering structure pointer
clearRenderData()16737 void ldomNode::clearRenderData()
16738 {
16739 ASSERT_NODE_NOT_NULL;
16740 if ( !isElement() )
16741 return;
16742 lvdomElementFormatRec rec;
16743 getDocument()->_rectStorage.setRendRectData(_handle._dataIndex, &rec);
16744 }
16745 /// reset node rendering structure pointer for sub-tree
clearRenderDataRecursive()16746 void ldomNode::clearRenderDataRecursive()
16747 {
16748 ASSERT_NODE_NOT_NULL;
16749 if ( !isElement() )
16750 return;
16751 lvdomElementFormatRec rec;
16752 getDocument()->_rectStorage.setRendRectData(_handle._dataIndex, &rec);
16753 int cnt = getChildCount();
16754 for (int i=0; i<cnt; i++) {
16755 ldomNode * child = getChildNode( i );
16756 if ( child->isElement() ) {
16757 child->clearRenderDataRecursive();
16758 }
16759 }
16760 }
16761 #endif
16762
16763 /// calls specified function recursively for all elements of DOM tree, children before parent
recurseElementsDeepFirst(void (* pFun)(ldomNode * node))16764 void ldomNode::recurseElementsDeepFirst( void (*pFun)( ldomNode * node ) )
16765 {
16766 ASSERT_NODE_NOT_NULL;
16767 if ( !isElement() )
16768 return;
16769 int cnt = getChildCount();
16770 for (int i=0; i<cnt; i++)
16771 {
16772 ldomNode * child = getChildNode( i );
16773 if ( child && child->isElement() )
16774 {
16775 child->recurseElementsDeepFirst( pFun );
16776 }
16777 }
16778 pFun( this );
16779 }
16780
16781 #if BUILD_LITE!=1
updateRendMethod(ldomNode * node)16782 static void updateRendMethod( ldomNode * node )
16783 {
16784 node->initNodeRendMethod();
16785 // Also clean up node previous positionings (they were set while in
16786 // a previous page drawing phase), that could otherwise have negative
16787 // impact on the coming rendering (noticeable with table elements).
16788 RenderRectAccessor fmt( node );
16789 fmt.clear();
16790 fmt.push();
16791 }
16792
16793 /// init render method for the whole subtree
initNodeRendMethodRecursive()16794 void ldomNode::initNodeRendMethodRecursive()
16795 {
16796 recurseElementsDeepFirst( updateRendMethod );
16797 }
16798 #endif
16799
16800 #if 0
16801 static void updateStyleData( ldomNode * node )
16802 {
16803 if ( node->getNodeId()==el_DocFragment )
16804 node->applyNodeStylesheet();
16805 node->initNodeStyle();
16806 }
16807 #endif
16808
16809 #if BUILD_LITE!=1
updateStyleDataRecursive(ldomNode * node,LVDocViewCallback * progressCallback,int & lastProgressPercent)16810 static void updateStyleDataRecursive( ldomNode * node, LVDocViewCallback * progressCallback, int & lastProgressPercent )
16811 {
16812 if ( !node->isElement() )
16813 return;
16814 bool styleSheetChanged = false;
16815
16816 // DocFragment (for epub) and body (for html) may hold some stylesheet
16817 // as first child or a link to stylesheet file in attribute
16818 if ( node->getNodeId()==el_DocFragment || node->getNodeId()==el_body ) {
16819 styleSheetChanged = node->applyNodeStylesheet();
16820 // We don't have access to much metric to show the progress of
16821 // this recursive phase. Do that anyway as we progress among
16822 // the collection of DocFragments.
16823 if ( progressCallback && node->getNodeId()==el_DocFragment ) {
16824 int nbDocFragments = node->getParentNode()->getChildCount();
16825 if (nbDocFragments == 0) // should not happen (but avoid clang-tidy warning)
16826 nbDocFragments = 1;
16827 int percent = 100 * node->getNodeIndex() / nbDocFragments;
16828 if ( percent != lastProgressPercent ) {
16829 progressCallback->OnNodeStylesUpdateProgress( percent );
16830 lastProgressPercent = percent;
16831 }
16832 }
16833 }
16834
16835 node->initNodeStyle();
16836 int n = node->getChildCount();
16837 for ( int i=0; i<n; i++ ) {
16838 ldomNode * child = node->getChildNode(i);
16839 if ( child && child->isElement() )
16840 updateStyleDataRecursive( child, progressCallback, lastProgressPercent );
16841 }
16842 if ( styleSheetChanged )
16843 node->getDocument()->getStyleSheet()->pop();
16844 }
16845
16846 /// init render method for the whole subtree
initNodeStyleRecursive(LVDocViewCallback * progressCallback)16847 void ldomNode::initNodeStyleRecursive( LVDocViewCallback * progressCallback )
16848 {
16849 if (progressCallback)
16850 progressCallback->OnNodeStylesUpdateStart();
16851 getDocument()->_fontMap.clear();
16852 int lastProgressPercent = -1;
16853 updateStyleDataRecursive( this, progressCallback, lastProgressPercent );
16854 //recurseElements( updateStyleData );
16855 if (progressCallback)
16856 progressCallback->OnNodeStylesUpdateEnd();
16857 }
16858 #endif
16859
16860 /// calls specified function recursively for all elements of DOM tree
recurseElements(void (* pFun)(ldomNode * node))16861 void ldomNode::recurseElements( void (*pFun)( ldomNode * node ) )
16862 {
16863 ASSERT_NODE_NOT_NULL;
16864 if ( !isElement() )
16865 return;
16866 pFun( this );
16867 int cnt = getChildCount();
16868 for (int i=0; i<cnt; i++)
16869 {
16870 ldomNode * child = getChildNode( i );
16871 if ( child->isElement() )
16872 {
16873 child->recurseElements( pFun );
16874 }
16875 }
16876 }
16877
16878 /// calls specified function recursively for all elements of DOM tree
recurseMatchingElements(void (* pFun)(ldomNode * node),bool (* matchFun)(ldomNode * node))16879 void ldomNode::recurseMatchingElements( void (*pFun)( ldomNode * node ), bool (*matchFun)( ldomNode * node ) )
16880 {
16881 ASSERT_NODE_NOT_NULL;
16882 if ( !isElement() )
16883 return;
16884 if ( !matchFun( this ) ) {
16885 return;
16886 }
16887 pFun( this );
16888 int cnt = getChildCount();
16889 for (int i=0; i<cnt; i++)
16890 {
16891 ldomNode * child = getChildNode( i );
16892 if ( child->isElement() )
16893 {
16894 child->recurseMatchingElements( pFun, matchFun );
16895 }
16896 }
16897 }
16898
16899 /// calls specified function recursively for all nodes of DOM tree
recurseNodes(void (* pFun)(ldomNode * node))16900 void ldomNode::recurseNodes( void (*pFun)( ldomNode * node ) )
16901 {
16902 ASSERT_NODE_NOT_NULL;
16903 pFun( this );
16904 if ( isElement() )
16905 {
16906 int cnt = getChildCount();
16907 for (int i=0; i<cnt; i++)
16908 {
16909 ldomNode * child = getChildNode( i );
16910 child->recurseNodes( pFun );
16911 }
16912 }
16913 }
16914
16915 /// returns first text child element
getFirstTextChild(bool skipEmpty)16916 ldomNode * ldomNode::getFirstTextChild(bool skipEmpty)
16917 {
16918 ASSERT_NODE_NOT_NULL;
16919 if ( isText() ) {
16920 if ( !skipEmpty )
16921 return this;
16922 lString32 txt = getText();
16923 bool nonSpaceFound = false;
16924 for ( int i=0; i<txt.length(); i++ ) {
16925 lChar32 ch = txt[i];
16926 if ( ch!=' ' && ch!='\t' && ch!='\r' && ch!='\n' ) {
16927 nonSpaceFound = true;
16928 break;
16929 }
16930 }
16931 if ( nonSpaceFound )
16932 return this;
16933 return NULL;
16934 }
16935 for ( int i=0; i<(int)getChildCount(); i++ ) {
16936 ldomNode * p = getChildNode(i)->getFirstTextChild(skipEmpty);
16937 if (p)
16938 return p;
16939 }
16940 return NULL;
16941 }
16942
16943 /// returns last text child element
getLastTextChild()16944 ldomNode * ldomNode::getLastTextChild()
16945 {
16946 ASSERT_NODE_NOT_NULL;
16947 if ( isText() )
16948 return this;
16949 else {
16950 for ( int i=(int)getChildCount()-1; i>=0; i-- ) {
16951 ldomNode * p = getChildNode(i)->getLastTextChild();
16952 if (p)
16953 return p;
16954 }
16955 }
16956 return NULL;
16957 }
16958
16959
16960 #if BUILD_LITE!=1
16961 /// find node by coordinates of point in formatted document
elementFromPoint(lvPoint pt,int direction,bool strict_bounds_checking)16962 ldomNode * ldomNode::elementFromPoint( lvPoint pt, int direction, bool strict_bounds_checking )
16963 {
16964 ASSERT_NODE_NOT_NULL;
16965 if ( !isElement() )
16966 return NULL;
16967 ldomNode * enode = this;
16968 lvdom_element_render_method rm = enode->getRendMethod();
16969 if ( rm == erm_invisible ) {
16970 return NULL;
16971 }
16972
16973 if ( rm == erm_inline ) {
16974 // We shouldn't meet erm_inline here, as our purpose is to return
16975 // a final node (so, the container of inlines), and not look further
16976 // (it's ldomDocument::createXPointer(pt) job to look at this final
16977 // node rendered content to find the exact text node and char at pt).
16978 // Except in the "pt.y is inside the box bottom overflow" case below,
16979 // and that box is erm_final (see there for more comments).
16980 // We should navigate all the erm_inline nodes, looking for
16981 // non-erm_inline ones that may be in that overflow and containt pt.
16982 // erm_inline nodes don't have a RenderRectAccessor(), so their x/y
16983 // shifts are 0, and any inner block node had its RenderRectAccessor
16984 // x/y offsets positioned related to the final block. So, no need
16985 // to shift pt: just recursively call elementFromPoint() as-is,
16986 // and we'll be recursively navigating inline nodes here.
16987 int count = getChildCount();
16988 for ( int i=0; i<count; i++ ) {
16989 ldomNode * p = getChildNode( i );
16990 ldomNode * e = p->elementFromPoint( pt, direction );
16991 if ( e ) // found it!
16992 return e;
16993 }
16994 return NULL; // nothing found
16995 }
16996
16997 RenderRectAccessor fmt( this );
16998
16999 if ( BLOCK_RENDERING_N(this, ENHANCED) ) {
17000 // In enhanced rendering mode, because of collapsing of vertical margins
17001 // and the fact that we did not update style margins to their computed
17002 // values, a children box with margins can overlap its parent box, if
17003 // the child bigger margin collapsed with the parent smaller margin.
17004 // So, if we ignore the margins, there can be holes along the vertical
17005 // axis (these holes are the collapsed margins). But the content boxes
17006 // (without margins) don't overlap.
17007 if ( direction >= PT_DIR_EXACT ) { // PT_DIR_EXACT or PT_DIR_SCAN_FORWARD*
17008 // We get the parent node's children in ascending order
17009 // It could just be:
17010 // if ( pt.y >= fmt.getY() + fmt.getHeight() )
17011 // // Box fully before pt.y: not a candidate, next one may be
17012 // return NULL;
17013 // but, because of possible floats overflowing their container element,
17014 // and we want to check if pt is inside any of them, we directly
17015 // check with bottom overflow included (just to avoid 2 tests
17016 // in the most common case when there is no overflow).
17017 if ( pt.y >= fmt.getY() + fmt.getHeight() + fmt.getBottomOverflow() ) {
17018 // Box (with overflow) fully before pt.y: not a candidate, next one may be
17019 return NULL;
17020 }
17021 if ( pt.y >= fmt.getY() + fmt.getHeight() ) { // pt.y is inside the box bottom overflow
17022 // Get back absolute coordinates of pt
17023 lvRect rc;
17024 getParentNode()->getAbsRect( rc );
17025 lvPoint pt0 = lvPoint(rc.left+pt.x, rc.top+pt.y );
17026 // Check each of this element's children if pt is inside it (so, we'll
17027 // go by here for each of them that has some overflow too, and that
17028 // contributed to making this element's overflow.)
17029 // Note that if this node is erm_final, its bottom overflow must have
17030 // been set by some inner embedded float. But this final block's children
17031 // are erm_inline, and the float might be deep among inlines' children.
17032 // erm_inline nodes don't have their RenderRectAccessor set, so the
17033 // bottom overflow is not propagated thru them, and we would be in
17034 // the above case ("Box (with overflow) fully before pt.y"), not
17035 // looking at inlines' children. We handle this case above (at the
17036 // start of this function) by looking at erm_inline's children for
17037 // non-erm_inline nodes before checking any x/y or bottom overflow.
17038 int count = getChildCount();
17039 for ( int i=0; i<count; i++ ) {
17040 ldomNode * p = getChildNode( i );
17041 // Find an inner erm_final element that has pt in it: for now, it can
17042 // only be a float. Use PT_DIR_EXACT to really check for x boundaries.
17043 ldomNode * e = p->elementFromPoint( lvPoint(pt.x-fmt.getX(), pt.y-fmt.getY()), PT_DIR_EXACT );
17044 if ( e ) {
17045 // Just to be sure, as elementFromPoint() may be a bit fuzzy in its
17046 // checks, double check that pt is really inside that e rect.
17047 lvRect erc;
17048 e->getAbsRect( erc );
17049 if ( erc.isPointInside(pt0) ) {
17050 return e; // return this inner erm_final
17051 }
17052 }
17053 }
17054 return NULL; // Nothing found in the overflow
17055 }
17056 // There is one special case to skip: floats that may have been
17057 // positioned after their normal y (because of clear:, or because
17058 // of not enough width). Their following non-float siblings (after
17059 // in the HTML/DOM tree) may have a lower fmt.getY().
17060 if ( isFloatingBox() && pt.y < fmt.getY() ) {
17061 // Float starts after pt.y: next non-float siblings may contain pt.y
17062 return NULL;
17063 }
17064 // When children of the parent node have been re-ordered, we can't
17065 // trust the ordering, and if pt.y is before fmt.getY(), we might
17066 // still find it in a next node that have been re-ordered before
17067 // this one for rendering.
17068 // Note: for now, happens only with re-ordered table rows, so
17069 // we're only ensuring it here for y. This check might have to
17070 // also be done elsewhere in this function when we use it for
17071 // other things.
17072 if ( strict_bounds_checking && pt.y < fmt.getY() ) {
17073 // Box fully after pt.y: not a candidate, next one
17074 // (if reordered) may be
17075 return NULL;
17076 }
17077 // pt.y is inside the box (without overflows), go on with it.
17078 // Note: we don't check for next elements which may have a top
17079 // overflow and have pt.y inside it, because it would be a bit
17080 // more twisted here, and it's less common that floats overflow
17081 // their container's top (they need to have negative margins).
17082 }
17083 else { // PT_DIR_SCAN_BACKWARD*
17084 // We get the parent node's children in descending order
17085 if ( pt.y < fmt.getY() ) {
17086 // Box fully after pt.y: not a candidate, next one may be
17087 return NULL;
17088 }
17089 if ( strict_bounds_checking && pt.y >= fmt.getY() + fmt.getHeight() ) {
17090 // Box fully before pt.y: not a candidate, next one
17091 // (if reordered) may be
17092 return NULL;
17093 }
17094 }
17095 }
17096 else {
17097 // In legacy rendering mode, all boxes (with their margins added) touch
17098 // each other, and the boxes of children are fully contained (with
17099 // their margins added) in their parent box.
17100
17101 // Styles margins set on <TR>, <THEAD> and the like are ignored
17102 // by table layout algorithm (as per CSS specs)
17103 // (erm_table_row_group, erm_table_header_group, erm_table_footer_group, erm_table_row)
17104 bool ignore_margins = rm >= erm_table_row_group && rm <= erm_table_row;
17105
17106 int top_margin = ignore_margins ? 0 : lengthToPx(enode->getStyle()->margin[2], fmt.getWidth(), enode->getFont()->getSize());
17107 if ( pt.y < fmt.getY() - top_margin) {
17108 if ( direction >= PT_DIR_SCAN_FORWARD && rm == erm_final )
17109 return this;
17110 return NULL;
17111 }
17112 int bottom_margin = ignore_margins ? 0 : lengthToPx(enode->getStyle()->margin[3], fmt.getWidth(), enode->getFont()->getSize());
17113 if ( pt.y >= fmt.getY() + fmt.getHeight() + bottom_margin ) {
17114 if ( direction <= PT_DIR_SCAN_BACKWARD && rm == erm_final )
17115 return this;
17116 return NULL;
17117 }
17118 }
17119
17120 if ( direction == PT_DIR_EXACT ) {
17121 // (We shouldn't check for pt.x when we are given PT_DIR_SCAN_*.
17122 // In full text search, we might not find any and get locked
17123 // on some page.)
17124 if ( pt.x >= fmt.getX() + fmt.getWidth() ) {
17125 return NULL;
17126 }
17127 if ( pt.x < fmt.getX() ) {
17128 return NULL;
17129 }
17130 // We now do this above check in all cases.
17131 // Previously:
17132 //
17133 // We also don't need to do it if pt.x=0, which is often used
17134 // to get current page top or range xpointers.
17135 // We are given a x>0 when tap/hold to highlight text or find
17136 // a link, and checking x vs fmt.x and width allows for doing
17137 // that correctly in 2nd+ table cells.
17138 //
17139 // No need to check if ( pt.x < fmt.getX() ): we probably
17140 // meet the multiple elements that can be formatted on a same
17141 // line in the order they appear as children of their parent,
17142 // we can simply just ignore those who end before our pt.x.
17143 // But check x if we happen to be on a floating node (which,
17144 // with float:right, can appear first in the DOM but be
17145 // displayed at a higher x)
17146 // if ( pt.x < fmt.getX() && enode->isFloatingBox() ) {
17147 // return NULL;
17148 // }
17149 // This is no more true, now that we support RTL tables and
17150 // we can meet cells in the reverse of their logical order.
17151 // We could add more conditions (like parentNode->getRendMethod()>=erm_table),
17152 // but let's just check this in all cases when direction=0.
17153 }
17154 if ( rm == erm_final ) {
17155 // Final node, that's what we looked for
17156 return this;
17157 }
17158 // Not a final node, but a block container node that must contain
17159 // the final node we look for: check its children.
17160 int count = getChildCount();
17161 strict_bounds_checking = RENDER_RECT_HAS_FLAG(fmt, CHILDREN_RENDERING_REORDERED);
17162 if ( direction >= PT_DIR_EXACT ) { // PT_DIR_EXACT or PT_DIR_SCAN_FORWARD*
17163 for ( int i=0; i<count; i++ ) {
17164 ldomNode * p = getChildNode( i );
17165 ldomNode * e = p->elementFromPoint( lvPoint(pt.x-fmt.getX(), pt.y-fmt.getY()), direction, strict_bounds_checking );
17166 if ( e )
17167 return e;
17168 }
17169 } else {
17170 for ( int i=count-1; i>=0; i-- ) {
17171 ldomNode * p = getChildNode( i );
17172 ldomNode * e = p->elementFromPoint( lvPoint(pt.x-fmt.getX(), pt.y-fmt.getY()), direction, strict_bounds_checking );
17173 if ( e )
17174 return e;
17175 }
17176 }
17177 return this;
17178 }
17179
17180 /// find final node by coordinates of point in formatted document
finalBlockFromPoint(lvPoint pt)17181 ldomNode * ldomNode::finalBlockFromPoint( lvPoint pt )
17182 {
17183 ASSERT_NODE_NOT_NULL;
17184 ldomNode * elem = elementFromPoint( pt, PT_DIR_EXACT );
17185 if ( elem && elem->getRendMethod() == erm_final )
17186 return elem;
17187 return NULL;
17188 }
17189 #endif
17190
17191 /// returns rendering method
getRendMethod()17192 lvdom_element_render_method ldomNode::getRendMethod()
17193 {
17194 ASSERT_NODE_NOT_NULL;
17195 if ( isElement() ) {
17196 #if BUILD_LITE!=1
17197 if ( !isPersistent() ) {
17198 #endif
17199 return NPELEM->_rendMethod;
17200 #if BUILD_LITE!=1
17201 } else {
17202 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17203 return (lvdom_element_render_method)me->rendMethod;
17204 }
17205 #endif
17206 }
17207 return erm_invisible;
17208 }
17209
17210 /// sets rendering method
setRendMethod(lvdom_element_render_method method)17211 void ldomNode::setRendMethod( lvdom_element_render_method method )
17212 {
17213 ASSERT_NODE_NOT_NULL;
17214 if ( isElement() ) {
17215 #if BUILD_LITE!=1
17216 if ( !isPersistent() ) {
17217 #endif
17218 NPELEM->_rendMethod = method;
17219 #if BUILD_LITE!=1
17220 } else {
17221 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17222 if ( me->rendMethod != method ) {
17223 me->rendMethod = (lUInt8)method;
17224 modified();
17225 }
17226 }
17227 #endif
17228 }
17229 }
17230
17231 #if BUILD_LITE!=1
17232 /// returns element style record
getStyle() const17233 css_style_ref_t ldomNode::getStyle() const
17234 {
17235 ASSERT_NODE_NOT_NULL;
17236 if ( !isElement() )
17237 return css_style_ref_t();
17238 css_style_ref_t res = getDocument()->getNodeStyle( _handle._dataIndex );
17239 return res;
17240 }
17241
17242 /// returns element font
getFont()17243 font_ref_t ldomNode::getFont()
17244 {
17245 ASSERT_NODE_NOT_NULL;
17246 if ( !isElement() )
17247 return font_ref_t();
17248 return getDocument()->getNodeFont( _handle._dataIndex );
17249 }
17250
17251 /// sets element font
setFont(font_ref_t font)17252 void ldomNode::setFont( font_ref_t font )
17253 {
17254 ASSERT_NODE_NOT_NULL;
17255 if ( isElement() ) {
17256 getDocument()->setNodeFont( _handle._dataIndex, font );
17257 }
17258 }
17259
17260 /// sets element style record
setStyle(css_style_ref_t & style)17261 void ldomNode::setStyle( css_style_ref_t & style )
17262 {
17263 ASSERT_NODE_NOT_NULL;
17264 if ( isElement() ) {
17265 getDocument()->setNodeStyle( _handle._dataIndex, style );
17266 }
17267 }
17268
initNodeFont()17269 bool ldomNode::initNodeFont()
17270 {
17271 if ( !isElement() )
17272 return false;
17273 lUInt16 style = getDocument()->getNodeStyleIndex( _handle._dataIndex );
17274 lUInt16 font = getDocument()->getNodeFontIndex( _handle._dataIndex );
17275 lUInt16 fntIndex = getDocument()->_fontMap.get( style );
17276 if ( fntIndex==0 ) {
17277 css_style_ref_t s = getDocument()->_styles.get( style );
17278 if ( s.isNull() ) {
17279 CRLog::error("style not found for index %d", style);
17280 s = getDocument()->_styles.get( style );
17281 }
17282 LVFontRef fnt = ::getFont(s.get(), getDocument()->getFontContextDocIndex());
17283 fntIndex = (lUInt16)getDocument()->_fonts.cache( fnt );
17284 if ( fnt.isNull() ) {
17285 CRLog::error("font not found for style!");
17286 return false;
17287 } else {
17288 getDocument()->_fontMap.set(style, fntIndex);
17289 }
17290 if ( font != 0 ) {
17291 if ( font!=fntIndex ) // ???
17292 getDocument()->_fonts.release(font);
17293 }
17294 getDocument()->setNodeFontIndex( _handle._dataIndex, fntIndex);
17295 return true;
17296 } else {
17297 if ( font!=fntIndex )
17298 getDocument()->_fonts.addIndexRef( fntIndex );
17299 }
17300 if ( fntIndex<=0 ) {
17301 CRLog::error("font caching failed for style!");
17302 return false;;
17303 } else {
17304 getDocument()->setNodeFontIndex( _handle._dataIndex, fntIndex);
17305 }
17306 return true;
17307 }
17308
initNodeStyle()17309 void ldomNode::initNodeStyle()
17310 {
17311 // assume all parent styles already initialized
17312 if ( !getDocument()->isDefStyleSet() )
17313 return;
17314 if ( isElement() ) {
17315 if ( isRoot() || getParentNode()->isRoot() )
17316 {
17317 setNodeStyle( this,
17318 getDocument()->getDefaultStyle(),
17319 getDocument()->getDefaultFont()
17320 );
17321 }
17322 else
17323 {
17324 ldomNode * parent = getParentNode();
17325
17326 // DEBUG TEST
17327 if ( parent->getChildIndex( getDataIndex() )<0 ) {
17328 CRLog::error("Invalid parent->child relation for nodes %d->%d", parent->getDataIndex(), getDataIndex() );
17329 }
17330
17331
17332 //lvdomElementFormatRec * parent_fmt = node->getParentNode()->getRenderData();
17333 css_style_ref_t style = parent->getStyle();
17334 LVFontRef font = parent->getFont();
17335 #if DEBUG_DOM_STORAGE==1
17336 if ( style.isNull() ) {
17337 // for debugging
17338 CRLog::error("NULL style is returned for node <%s> %d level=%d "
17339 "parent <%s> %d level=%d children %d childIndex=%d",
17340 LCSTR(getNodeName()), getDataIndex(), getNodeLevel(),
17341 LCSTR(parent->getNodeName()), parent->getDataIndex(),
17342 parent->getNodeLevel(), parent->getChildCount(), parent->getChildIndex(getDataIndex()) );
17343
17344 style = parent->getStyle();
17345 }
17346 #endif
17347 setNodeStyle( this,
17348 style,
17349 font
17350 );
17351 #if DEBUG_DOM_STORAGE==1
17352 if ( this->getStyle().isNull() ) {
17353 CRLog::error("NULL style is set for <%s>", LCSTR(getNodeName()) );
17354 style = this->getStyle();
17355 }
17356 #endif
17357 }
17358 }
17359 }
17360 #endif
17361
isBoxingNode(bool orPseudoElem) const17362 bool ldomNode::isBoxingNode( bool orPseudoElem ) const
17363 {
17364 if( isElement() ) {
17365 lUInt16 id = getNodeId();
17366 if( id >= el_autoBoxing && id <= el_inlineBox ) {
17367 return true;
17368 }
17369 if ( orPseudoElem && id == el_pseudoElem ) {
17370 return true;
17371 }
17372 }
17373 return false;
17374 }
17375
getUnboxedParent() const17376 ldomNode * ldomNode::getUnboxedParent() const
17377 {
17378 ldomNode * parent = getParentNode();
17379 while ( parent && parent->isBoxingNode() )
17380 parent = parent->getParentNode();
17381 return parent;
17382 }
17383
17384 // The following 4 methods are mostly used when checking CSS siblings/child
17385 // rules and counting list items siblings: we have them skip pseudoElems by
17386 // using isBoxingNode(orPseudoElem=true).
getUnboxedFirstChild(bool skip_text_nodes) const17387 ldomNode * ldomNode::getUnboxedFirstChild( bool skip_text_nodes ) const
17388 {
17389 for ( int i=0; i<getChildCount(); i++ ) {
17390 ldomNode * child = getChildNode(i);
17391 if ( child && child->isBoxingNode(true) ) {
17392 child = child->getUnboxedFirstChild( skip_text_nodes );
17393 // (child will then be NULL if it was a pseudoElem)
17394 }
17395 if ( child && (!skip_text_nodes || !child->isText()) )
17396 return child;
17397 }
17398 return NULL;
17399 }
17400
getUnboxedLastChild(bool skip_text_nodes) const17401 ldomNode * ldomNode::getUnboxedLastChild( bool skip_text_nodes ) const
17402 {
17403 for ( int i=getChildCount()-1; i>=0; i-- ) {
17404 ldomNode * child = getChildNode(i);
17405 if ( child && child->isBoxingNode(true) ) {
17406 child = child->getUnboxedLastChild( skip_text_nodes );
17407 }
17408 if ( child && (!skip_text_nodes || !child->isText()) )
17409 return child;
17410 }
17411 return NULL;
17412 }
17413
17414 /* For reference, a non-recursive node subtree walker:
17415 ldomNode * n = topNode;
17416 if ( n && n->getChildCount() > 0 ) {
17417 int index = 0;
17418 n = n->getChildNode(index);
17419 while ( true ) {
17420 // Check the node only the first time we meet it (index == 0) and
17421 // not when we get back to it from a child to process next sibling
17422 if ( index == 0 ) {
17423 // Check n, process it, return it...
17424 }
17425 // Process next child
17426 if ( index < n->getChildCount() ) {
17427 n = n->getChildNode(index);
17428 index = 0;
17429 continue;
17430 }
17431 // No more child, get back to parent and have it process our sibling
17432 index = n->getNodeIndex() + 1;
17433 n = n->getParentNode();
17434 if ( n == topNode && index >= n->getChildCount() )
17435 break; // back to top node and all its children visited
17436 }
17437 }
17438 */
17439
getUnboxedNextSibling(bool skip_text_nodes) const17440 ldomNode * ldomNode::getUnboxedNextSibling( bool skip_text_nodes ) const
17441 {
17442 // We use a variation of the above non-recursive node subtree walker,
17443 // but with an arbitrary starting node (this) inside the unboxed_parent
17444 // tree, and checks to not walk down non-boxing nodes - but still
17445 // walking up any node (which ought to be a boxing node).
17446 ldomNode * unboxed_parent = getUnboxedParent(); // don't walk outside of it
17447 if ( !unboxed_parent )
17448 return NULL;
17449 ldomNode * n = (ldomNode *) this;
17450 int index = 0;
17451 bool node_entered = true; // bootstrap loop
17452 // We may meet a same node as 'n' multiple times:
17453 // - once with node_entered=false and index being its real position inside
17454 // its parent children collection, and we'll be "entering" it
17455 // - once with node_entered=true and index=0, meaning we have "entered" it to
17456 // check if it's a candidate, and to possibly go on checking its own children.
17457 // - once when back from its children, with node_entered=false and index
17458 // being that previous child index + 1, to go process its next sibling
17459 // (or parent if no more sibling)
17460 while ( true ) {
17461 // printf(" %s\n", LCSTR(ldomXPointer(n,0).toStringV1()));
17462 if ( node_entered && n != this ) { // Don't check the starting node
17463 // Check if this node is a candidate
17464 if ( n->isText() ) { // Text nodes are not boxing nodes
17465 if ( !skip_text_nodes )
17466 return n;
17467 }
17468 else if ( !n->isBoxingNode(true) ) // Not a boxing node nor pseudoElem
17469 return n;
17470 // Otherwise, this node is a boxing node (or a text node or a pseudoElem
17471 // with no child, and we'll get back to its parent)
17472 }
17473 // Enter next node, and re-loop to have it checked
17474 // - if !node_entered : n is the parent and index points to the next child
17475 // we want to check
17476 // - if n->isBoxingNode() (and node_entered=true, and index=0): enter the first
17477 // child of this boxingNode (not if pseudoElem, that doesn't box anything)
17478 if ( (!node_entered || n->isBoxingNode()) && index < n->getChildCount() ) {
17479 n = n->getChildNode(index);
17480 index = 0;
17481 node_entered = true;
17482 continue;
17483 }
17484 // No more sibling/child to check, get back to parent and have it
17485 // process n's next sibling
17486 index = n->getNodeIndex() + 1;
17487 n = n->getParentNode();
17488 node_entered = false;
17489 if ( n == unboxed_parent && index >= n->getChildCount() ) {
17490 // back to real parent node and no more child to check
17491 break;
17492 }
17493 }
17494 return NULL;
17495 }
17496
getUnboxedPrevSibling(bool skip_text_nodes) const17497 ldomNode * ldomNode::getUnboxedPrevSibling( bool skip_text_nodes ) const
17498 {
17499 // Similar to getUnboxedNextSibling(), but walking backward
17500 ldomNode * unboxed_parent = getUnboxedParent();
17501 if ( !unboxed_parent )
17502 return NULL;
17503 ldomNode * n = (ldomNode *) this;
17504 int index = 0;
17505 bool node_entered = true; // bootstrap loop
17506 while ( true ) {
17507 // printf(" %s\n", LCSTR(ldomXPointer(n,0).toStringV1()));
17508 if ( node_entered && n != this ) {
17509 if ( n->isText() ) {
17510 if ( !skip_text_nodes )
17511 return n;
17512 }
17513 else if ( !n->isBoxingNode(true) )
17514 return n;
17515 }
17516 if ( (!node_entered || n->isBoxingNode()) && index >= 0 && index < n->getChildCount() ) {
17517 n = n->getChildNode(index);
17518 index = n->getChildCount() - 1;
17519 node_entered = true;
17520 continue;
17521 }
17522 index = n->getNodeIndex() - 1;
17523 n = n->getParentNode();
17524 node_entered = false;
17525 if ( n == unboxed_parent && index < 0 ) {
17526 break;
17527 }
17528 }
17529 return NULL;
17530 }
17531
17532 /// for display:list-item node, get marker
getNodeListMarker(int & counterValue,lString32 & marker,int & markerWidth)17533 bool ldomNode::getNodeListMarker( int & counterValue, lString32 & marker, int & markerWidth )
17534 {
17535 #if BUILD_LITE!=1
17536 css_style_ref_t s = getStyle();
17537 marker.clear();
17538 markerWidth = 0;
17539 if ( s.isNull() )
17540 return false;
17541 css_list_style_type_t st = s->list_style_type;
17542 switch ( st ) {
17543 default:
17544 // treat default as disc
17545 case css_lst_disc:
17546 marker = U"\x2022"; // U"\x25CF" U"\x26AB" (medium circle) U"\x2981" (spot) U"\x2022" (bullet, small)
17547 break;
17548 case css_lst_circle:
17549 marker = U"\x25E6"; // U"\x25CB" U"\x26AA (medium) U"\25E6" (bullet) U"\x26AC (medium small)
17550 break;
17551 case css_lst_square:
17552 marker = U"\x25AA"; // U"\x25A0" U"\x25FE" (medium small) U"\x25AA" (small)
17553 break;
17554 case css_lst_none:
17555 // When css_lsp_inside, no space is used by the invisible marker
17556 if ( s->list_style_position != css_lsp_inside ) {
17557 marker = U"\x0020";
17558 }
17559 break;
17560 case css_lst_decimal:
17561 case css_lst_lower_roman:
17562 case css_lst_upper_roman:
17563 case css_lst_lower_alpha:
17564 case css_lst_upper_alpha:
17565 do {
17566 // If this element has a valid value then use it avoiding a walk.
17567 lString32 el_value = getAttributeValue(attr_value);
17568 if ( !el_value.empty() ) {
17569 int el_ivalue;
17570 if ( el_value.atoi(el_ivalue) ) {
17571 counterValue = el_ivalue;
17572 break;
17573 }
17574 }
17575
17576 // The UL > LI parent-child chain may have had some of our Boxing elements inserted
17577 ldomNode * parent = getUnboxedParent();
17578
17579 // See if parent has a 'reversed' attribute.
17580 int increment = parent->hasAttribute(attr_reversed) ? -1 : +1;
17581
17582 // If the caller passes in a non-zero counter then it is assumed
17583 // have been already calculated and have the value of the prior
17584 // element of a walk. There may be a redundant recalculation in
17585 // the case of zero.
17586 if ( counterValue != 0 ) {
17587 counterValue += increment;
17588 break;
17589 }
17590
17591 // See if parent has a valid 'start' attribute.
17592 // https://www.w3.org/TR/html5/grouping-content.html#the-ol-element
17593 // "The start attribute, if present, must be a valid integer giving the ordinal value of the first list item."
17594 lString32 start_value = parent->getAttributeValue(attr_start);
17595 int istart;
17596 if ( !start_value.empty() && start_value.atoi(istart) )
17597 counterValue = istart;
17598 else if ( increment > 0 )
17599 counterValue = 1;
17600 else {
17601 // For a reversed ordering the default start is equal to the
17602 // number of child elements.
17603 counterValue = 0;
17604
17605 ldomNode * sibling = parent->getUnboxedFirstChild(true);
17606 while ( sibling ) {
17607 css_style_ref_t cs = sibling->getStyle();
17608 if ( cs.isNull() ) { // Should not happen, but let's be sure
17609 if ( sibling == this )
17610 break;
17611 sibling = sibling->getUnboxedNextSibling(true);
17612 continue;
17613 }
17614 if ( cs->display != css_d_list_item_block && cs->display != css_d_list_item_legacy) {
17615 // Alien element among list item nodes, skip it to not mess numbering
17616 if ( sibling == this ) // Should not happen, but let's be sure
17617 break;
17618 sibling = sibling->getUnboxedNextSibling(true);
17619 continue;
17620 }
17621 counterValue++;
17622 sibling = sibling->getUnboxedNextSibling(true); // skip text nodes
17623 }
17624 }
17625
17626 // iterate parent's real children from start up to this node
17627 counterValue -= increment;
17628 ldomNode * sibling = parent->getUnboxedFirstChild(true);
17629 while ( sibling ) {
17630 css_style_ref_t cs = sibling->getStyle();
17631 if ( cs.isNull() ) { // Should not happen, but let's be sure
17632 if ( sibling == this )
17633 break;
17634 sibling = sibling->getUnboxedNextSibling(true);
17635 continue;
17636 }
17637 if ( cs->display != css_d_list_item_block && cs->display != css_d_list_item_legacy) {
17638 // Alien element among list item nodes, skip it to not mess numbering
17639 if ( sibling == this ) // Should not happen, but let's be sure
17640 break;
17641 sibling = sibling->getUnboxedNextSibling(true);
17642 continue;
17643 }
17644
17645 // Count advances irrespective of the list style.
17646 counterValue += increment;
17647
17648 // See if it has a 'value' attribute that overrides the incremented value
17649 // https://www.w3.org/TR/html5/grouping-content.html#the-li-element
17650 // "The value attribute, if present, must be a valid integer giving the ordinal value of the list item."
17651 lString32 value = sibling->getAttributeValue(attr_value);
17652 if ( !value.empty() ) {
17653 int ivalue;
17654 if ( value.atoi(ivalue) )
17655 counterValue = ivalue;
17656 }
17657 if ( sibling == this )
17658 break;
17659 sibling = sibling->getUnboxedNextSibling(true); // skip text nodes
17660 }
17661 } while (0);
17662
17663 static const char * lower_roman[] = {"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix",
17664 "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix",
17665 "xx", "xxi", "xxii", "xxiii"};
17666 switch (st) {
17667 case css_lst_decimal:
17668 marker = lString32::itoa(counterValue);
17669 marker << '.';
17670 break;
17671 case css_lst_lower_roman:
17672 if (counterValue > 0 &&
17673 counterValue - 1 < (int)(sizeof(lower_roman) / sizeof(lower_roman[0])))
17674 marker = lString32(lower_roman[counterValue-1]);
17675 else
17676 marker = lString32::itoa(counterValue); // fallback to simple counter
17677 marker << '.';
17678 break;
17679 case css_lst_upper_roman:
17680 if (counterValue > 0 &&
17681 counterValue - 1 < (int)(sizeof(lower_roman) / sizeof(lower_roman[0])))
17682 marker = lString32(lower_roman[counterValue-1]);
17683 else
17684 marker = lString32::itoa(counterValue); // fallback to simple digital counter
17685 marker.uppercase();
17686 marker << '.';
17687 break;
17688 case css_lst_lower_alpha:
17689 if ( counterValue > 0 && counterValue<=26 )
17690 marker.append(1, (lChar32)('a' + counterValue - 1));
17691 else
17692 marker = lString32::itoa(counterValue); // fallback to simple digital counter
17693 marker << '.';
17694 break;
17695 case css_lst_upper_alpha:
17696 if ( counterValue > 0 && counterValue<=26 )
17697 marker.append(1, (lChar32)('A' + counterValue - 1));
17698 else
17699 marker = lString32::itoa(counterValue); // fallback to simple digital counter
17700 marker << '.';
17701 break;
17702 case css_lst_disc:
17703 case css_lst_circle:
17704 case css_lst_square:
17705 case css_lst_none:
17706 case css_lst_inherit:
17707 // do nothing
17708 break;
17709 }
17710 break;
17711 }
17712 bool res = false;
17713 if ( !marker.empty() ) {
17714 LVFontRef font = getFont();
17715 if ( !font.isNull() ) {
17716 TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg( this );
17717 markerWidth = font->getTextWidth((marker + " ").c_str(), marker.length()+2, lang_cfg) + font->getSize()/8;
17718 res = true;
17719 } else {
17720 marker.clear();
17721 }
17722 }
17723 return res;
17724 #else
17725 marker = cs32("*");
17726 return true;
17727 #endif
17728 }
17729
17730
17731 /// returns first child node
getFirstChild() const17732 ldomNode * ldomNode::getFirstChild() const
17733 {
17734 ASSERT_NODE_NOT_NULL;
17735 if ( isElement() ) {
17736 #if BUILD_LITE!=1
17737 if ( !isPersistent() ) {
17738 #endif
17739 tinyElement * me = NPELEM;
17740 if ( me->_children.length() )
17741 return getDocument()->getTinyNode(me->_children[0]);
17742 #if BUILD_LITE!=1
17743 } else {
17744 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17745 if ( me->childCount )
17746 return getDocument()->getTinyNode(me->children[0]);
17747 }
17748 #endif
17749 }
17750 return NULL;
17751 }
17752
17753 /// returns last child node
getLastChild() const17754 ldomNode * ldomNode::getLastChild() const
17755 {
17756 ASSERT_NODE_NOT_NULL;
17757 if ( isElement() ) {
17758 #if BUILD_LITE!=1
17759 if ( !isPersistent() ) {
17760 #endif
17761 tinyElement * me = NPELEM;
17762 if ( me->_children.length() )
17763 return getDocument()->getTinyNode(me->_children[me->_children.length()-1]);
17764 #if BUILD_LITE!=1
17765 } else {
17766 ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
17767 if ( me->childCount )
17768 return getDocument()->getTinyNode(me->children[me->childCount-1]);
17769 }
17770 #endif
17771 }
17772 return NULL;
17773 }
17774
17775 /// removes and deletes last child element
removeLastChild()17776 void ldomNode::removeLastChild()
17777 {
17778 ASSERT_NODE_NOT_NULL;
17779 if ( hasChildren() ) {
17780 ldomNode * lastChild = removeChild( getChildCount() - 1 );
17781 lastChild->destroy();
17782 }
17783 }
17784
17785 /// add child
addChild(lInt32 childNodeIndex)17786 void ldomNode::addChild( lInt32 childNodeIndex )
17787 {
17788 ASSERT_NODE_NOT_NULL;
17789 if ( !isElement() )
17790 return;
17791 if ( isPersistent() )
17792 modify(); // convert to mutable element
17793 tinyElement * me = NPELEM;
17794 me->_children.add( childNodeIndex );
17795 }
17796
17797 /// move range of children startChildIndex to endChildIndex inclusively to specified element
moveItemsTo(ldomNode * destination,int startChildIndex,int endChildIndex)17798 void ldomNode::moveItemsTo( ldomNode * destination, int startChildIndex, int endChildIndex )
17799 {
17800 ASSERT_NODE_NOT_NULL;
17801 if ( !isElement() )
17802 return;
17803 if ( isPersistent() )
17804 modify();
17805
17806 #ifdef TRACE_AUTOBOX
17807 CRLog::debug( "moveItemsTo() invoked from %d to %d", getDataIndex(), destination->getDataIndex() );
17808 #endif
17809 //if ( getDataIndex()==INDEX2 || getDataIndex()==INDEX1) {
17810 // CRLog::trace("nodes from element %d are being moved", getDataIndex());
17811 //}
17812 /*#ifdef _DEBUG
17813 if ( !_document->checkConsistency( false ) )
17814 CRLog::error("before moveItemsTo");
17815 #endif*/
17816 int len = endChildIndex - startChildIndex + 1;
17817 tinyElement * me = NPELEM;
17818 for ( int i=0; i<len; i++ ) {
17819 ldomNode * item = getChildNode( startChildIndex );
17820 //if ( item->getDataIndex()==INDEX2 || item->getDataIndex()==INDEX1 ) {
17821 // CRLog::trace("node %d is being moved", item->getDataIndex() );
17822 //}
17823 me->_children.remove( startChildIndex ); // + i
17824 item->setParentNode(destination);
17825 destination->addChild( item->getDataIndex() );
17826 }
17827 // TODO: renumber rest of children in necessary
17828 /*#ifdef _DEBUG
17829 if ( !_document->checkConsistency( false ) )
17830 CRLog::error("after moveItemsTo");
17831 #endif*/
17832
17833 }
17834
17835 /// find child element by tag id
findChildElement(lUInt16 nsid,lUInt16 id,int index)17836 ldomNode * ldomNode::findChildElement( lUInt16 nsid, lUInt16 id, int index )
17837 {
17838 ASSERT_NODE_NOT_NULL;
17839 if ( !isElement() )
17840 return NULL;
17841 ldomNode * res = NULL;
17842 int k = 0;
17843 int childCount = getChildCount();
17844 for ( int i=0; i<childCount; i++ )
17845 {
17846 ldomNode * p = getChildNode( i );
17847 if ( !p->isElement() )
17848 continue;
17849 if ( p->getNodeId() == id && ( (p->getNodeNsId() == nsid) || (nsid==LXML_NS_ANY) ) )
17850 {
17851 if ( k==index || index==-1 ) {
17852 res = p;
17853 break;
17854 }
17855 k++;
17856 }
17857 }
17858 if (!res) // || (index==-1 && k>1) // DON'T CHECK WHETHER OTHER ELEMENTS EXIST
17859 return NULL;
17860 return res;
17861 }
17862
17863 /// find child element by id path
findChildElement(lUInt16 idPath[])17864 ldomNode * ldomNode::findChildElement( lUInt16 idPath[] )
17865 {
17866 ASSERT_NODE_NOT_NULL;
17867 if ( !isElement() )
17868 return NULL;
17869 ldomNode * elem = this;
17870 for ( int i=0; idPath[i]; i++ ) {
17871 elem = elem->findChildElement( LXML_NS_ANY, idPath[i], -1 );
17872 if ( !elem )
17873 return NULL;
17874 }
17875 return elem;
17876 }
17877
17878 /// inserts child element
insertChildElement(lUInt32 index,lUInt16 nsid,lUInt16 id)17879 ldomNode * ldomNode::insertChildElement( lUInt32 index, lUInt16 nsid, lUInt16 id )
17880 {
17881 ASSERT_NODE_NOT_NULL;
17882 if ( isElement() ) {
17883 if ( isPersistent() )
17884 modify();
17885 tinyElement * me = NPELEM;
17886 if (index>(lUInt32)me->_children.length())
17887 index = me->_children.length();
17888 ldomNode * node = getDocument()->allocTinyElement( this, nsid, id );
17889 me->_children.insert( index, node->getDataIndex() );
17890 return node;
17891 }
17892 readOnlyError();
17893 return NULL;
17894 }
17895
17896 /// inserts child element
insertChildElement(lUInt16 id)17897 ldomNode * ldomNode::insertChildElement( lUInt16 id )
17898 {
17899 ASSERT_NODE_NOT_NULL;
17900 if ( isElement() ) {
17901 if ( isPersistent() )
17902 modify();
17903 ldomNode * node = getDocument()->allocTinyElement( this, LXML_NS_NONE, id );
17904 NPELEM->_children.insert( NPELEM->_children.length(), node->getDataIndex() );
17905 return node;
17906 }
17907 readOnlyError();
17908 return NULL;
17909 }
17910
17911 /// inserts child text
insertChildText(lUInt32 index,const lString32 & value)17912 ldomNode * ldomNode::insertChildText( lUInt32 index, const lString32 & value )
17913 {
17914 ASSERT_NODE_NOT_NULL;
17915 if ( isElement() ) {
17916 if ( isPersistent() )
17917 modify();
17918 tinyElement * me = NPELEM;
17919 if (index>(lUInt32)me->_children.length())
17920 index = me->_children.length();
17921 #if !defined(USE_PERSISTENT_TEXT) || BUILD_LITE==1
17922 ldomNode * node = getDocument()->allocTinyNode( NT_TEXT );
17923 lString8 s8 = UnicodeToUtf8(value);
17924 node->_data._text_ptr = new ldomTextNode(_handle._dataIndex, s8);
17925 #else
17926 ldomNode * node = getDocument()->allocTinyNode( NT_PTEXT );
17927 //node->_data._ptext_addr._parentIndex = _handle._dataIndex;
17928 lString8 s8 = UnicodeToUtf8(value);
17929 node->_data._ptext_addr = getDocument()->_textStorage.allocText( node->_handle._dataIndex, _handle._dataIndex, s8 );
17930 #endif
17931 me->_children.insert( index, node->getDataIndex() );
17932 return node;
17933 }
17934 readOnlyError();
17935 return NULL;
17936 }
17937
17938 /// inserts child text
insertChildText(const lString32 & value)17939 ldomNode * ldomNode::insertChildText( const lString32 & value )
17940 {
17941 ASSERT_NODE_NOT_NULL;
17942 if ( isElement() ) {
17943 if ( isPersistent() )
17944 modify();
17945 tinyElement * me = NPELEM;
17946 #if !defined(USE_PERSISTENT_TEXT) || BUILD_LITE==1
17947 ldomNode * node = getDocument()->allocTinyNode( NT_TEXT );
17948 lString8 s8 = UnicodeToUtf8(value);
17949 node->_data._text_ptr = new ldomTextNode(_handle._dataIndex, s8);
17950 #else
17951 ldomNode * node = getDocument()->allocTinyNode( NT_PTEXT );
17952 lString8 s8 = UnicodeToUtf8(value);
17953 node->_data._ptext_addr = getDocument()->_textStorage.allocText( node->_handle._dataIndex, _handle._dataIndex, s8 );
17954 #endif
17955 me->_children.insert( me->_children.length(), node->getDataIndex() );
17956 return node;
17957 }
17958 readOnlyError();
17959 return NULL;
17960 }
17961
17962 /// inserts child text
insertChildText(const lString8 & s8,bool before_last_child)17963 ldomNode * ldomNode::insertChildText(const lString8 & s8, bool before_last_child)
17964 {
17965 ASSERT_NODE_NOT_NULL;
17966 if ( isElement() ) {
17967 if ( isPersistent() )
17968 modify();
17969 tinyElement * me = NPELEM;
17970 #if !defined(USE_PERSISTENT_TEXT) || BUILD_LITE==1
17971 ldomNode * node = getDocument()->allocTinyNode( NT_TEXT );
17972 node->_data._text_ptr = new ldomTextNode(_handle._dataIndex, s8);
17973 #else
17974 ldomNode * node = getDocument()->allocTinyNode( NT_PTEXT );
17975 node->_data._ptext_addr = getDocument()->_textStorage.allocText( node->_handle._dataIndex, _handle._dataIndex, s8 );
17976 #endif
17977 int index = me->_children.length();
17978 if ( before_last_child && index > 0 )
17979 index--;
17980 me->_children.insert( index, node->getDataIndex() );
17981 return node;
17982 }
17983 readOnlyError();
17984 return NULL;
17985 }
17986
17987 /// remove child
removeChild(lUInt32 index)17988 ldomNode * ldomNode::removeChild( lUInt32 index )
17989 {
17990 ASSERT_NODE_NOT_NULL;
17991 if ( isElement() ) {
17992 if ( isPersistent() )
17993 modify();
17994 lUInt32 removedIndex = NPELEM->_children.remove(index);
17995 ldomNode * node = getTinyNode( removedIndex );
17996 return node;
17997 }
17998 readOnlyError();
17999 return NULL;
18000 }
18001
18002 /// creates stream to read base64 encoded data from element
createBase64Stream()18003 LVStreamRef ldomNode::createBase64Stream()
18004 {
18005 ASSERT_NODE_NOT_NULL;
18006 if ( !isElement() )
18007 return LVStreamRef();
18008 #define DEBUG_BASE64_IMAGE 0
18009 #if DEBUG_BASE64_IMAGE==1
18010 lString32 fname = getAttributeValue( attr_id );
18011 lString8 fname8 = UnicodeToUtf8( fname );
18012 LVStreamRef ostream = LVOpenFileStream( fname.empty() ? U"image.png" : fname.c_str(), LVOM_WRITE );
18013 printf("createBase64Stream(%s)\n", fname8.c_str());
18014 #endif
18015 LVStream * stream = new LVBase64NodeStream( this );
18016 if ( stream->GetSize()==0 )
18017 {
18018 #if DEBUG_BASE64_IMAGE==1
18019 printf(" cannot create base64 decoder stream!!!\n");
18020 #endif
18021 delete stream;
18022 return LVStreamRef();
18023 }
18024 LVStreamRef istream( stream );
18025
18026 #if DEBUG_BASE64_IMAGE==1
18027 LVPumpStream( ostream, istream );
18028 istream->SetPos(0);
18029 #endif
18030
18031 return istream;
18032 }
18033
18034 #if BUILD_LITE!=1
18035
18036 class NodeImageProxy : public LVImageSource
18037 {
18038 ldomNode * _node;
18039 lString32 _refName;
18040 int _dx;
18041 int _dy;
18042 public:
NodeImageProxy(ldomNode * node,lString32 refName,int dx,int dy)18043 NodeImageProxy( ldomNode * node, lString32 refName, int dx, int dy )
18044 : _node(node), _refName(refName), _dx(dx), _dy(dy)
18045 {
18046
18047 }
18048
GetSourceNode()18049 virtual ldomNode * GetSourceNode()
18050 {
18051 return NULL;
18052 }
GetSourceStream()18053 virtual LVStream * GetSourceStream()
18054 {
18055 return NULL;
18056 }
18057
Compact()18058 virtual void Compact() { }
GetWidth()18059 virtual int GetWidth() { return _dx; }
GetHeight()18060 virtual int GetHeight() { return _dy; }
Decode(LVImageDecoderCallback * callback)18061 virtual bool Decode( LVImageDecoderCallback * callback )
18062 {
18063 LVImageSourceRef img = _node->getDocument()->getObjectImageSource(_refName);
18064 if ( img.isNull() )
18065 return false;
18066 return img->Decode(callback);
18067 }
~NodeImageProxy()18068 virtual ~NodeImageProxy()
18069 {
18070
18071 }
18072 };
18073
18074 /// returns object image ref name
getObjectImageRefName(bool percentDecode)18075 lString32 ldomNode::getObjectImageRefName(bool percentDecode)
18076 {
18077 if (!isElement())
18078 return lString32::empty_str;
18079 //printf("ldomElement::getObjectImageSource() ... ");
18080 const css_elem_def_props_t * et = getDocument()->getElementTypePtr(getNodeId());
18081 if (!et || !et->is_object)
18082 return lString32::empty_str;
18083 lUInt16 hrefId = getDocument()->getAttrNameIndex("href");
18084 lUInt16 srcId = getDocument()->getAttrNameIndex("src");
18085 lUInt16 recIndexId = getDocument()->getAttrNameIndex("recindex");
18086 lString32 refName = getAttributeValue( getDocument()->getNsNameIndex("xlink"),
18087 hrefId );
18088
18089 if ( refName.empty() )
18090 refName = getAttributeValue( getDocument()->getNsNameIndex("l"), hrefId );
18091 if ( refName.empty() )
18092 refName = getAttributeValue( LXML_NS_ANY, hrefId ); //LXML_NS_NONE
18093 if ( refName.empty() )
18094 refName = getAttributeValue( LXML_NS_ANY, srcId ); //LXML_NS_NONE
18095 if (refName.empty()) {
18096 lString32 recindex = getAttributeValue( LXML_NS_ANY, recIndexId );
18097 if (!recindex.empty()) {
18098 int n;
18099 if (recindex.atoi(n)) {
18100 refName = lString32(MOBI_IMAGE_NAME_PREFIX) + fmt::decimal(n);
18101 //CRLog::debug("get mobi image %s", LCSTR(refName));
18102 }
18103 }
18104 // else {
18105 // for (int k=0; k<getAttrCount(); k++) {
18106 // CRLog::debug("attr %s=%s", LCSTR(getAttributeName(k)), LCSTR(getAttributeValue(getAttributeName(k).c_str())));
18107 // }
18108 // }
18109 }
18110 if ( refName.length()<2 )
18111 return lString32::empty_str;
18112 if (percentDecode)
18113 refName = DecodeHTMLUrlString(refName);
18114 return refName;
18115 }
18116
18117
18118 /// returns object image stream
getObjectImageStream()18119 LVStreamRef ldomNode::getObjectImageStream()
18120 {
18121 lString32 refName = getObjectImageRefName();
18122 if ( refName.empty() )
18123 return LVStreamRef();
18124 return getDocument()->getObjectImageStream( refName );
18125 }
18126
18127
18128 /// returns object image source
getObjectImageSource()18129 LVImageSourceRef ldomNode::getObjectImageSource()
18130 {
18131 lString32 refName = getObjectImageRefName(true);
18132 LVImageSourceRef ref;
18133 if ( refName.empty() )
18134 return ref;
18135 ref = getDocument()->getObjectImageSource( refName );
18136 if (ref.isNull()) {
18137 // try again without percent decoding (for fb3)
18138 refName = getObjectImageRefName(false);
18139 if ( refName.empty() )
18140 return ref;
18141 ref = getDocument()->getObjectImageSource( refName );
18142 }
18143 if ( !ref.isNull() ) {
18144 int dx = ref->GetWidth();
18145 int dy = ref->GetHeight();
18146 ref = LVImageSourceRef( new NodeImageProxy(this, refName, dx, dy) );
18147 } else {
18148 CRLog::error("ObjectImageSource cannot be opened by name %s", LCSTR(refName));
18149 }
18150
18151 getDocument()->_urlImageMap.set( refName, ref );
18152 return ref;
18153 }
18154
18155 /// register embedded document fonts in font manager, if any exist in document
registerEmbeddedFonts()18156 void ldomDocument::registerEmbeddedFonts()
18157 {
18158 if (_fontList.empty())
18159 return;
18160 int list = _fontList.length();
18161 lString8 x=lString8("");
18162 lString32Collection flist;
18163 fontMan->getFaceList(flist);
18164 int cnt = flist.length();
18165 for (int i = 0; i < list; i++) {
18166 LVEmbeddedFontDef *item = _fontList.get(i);
18167 lString32 url = item->getUrl();
18168 lString8 face = item->getFace();
18169 if (face.empty()) {
18170 for (int a=i+1;a<list;a++){
18171 lString8 tmp=_fontList.get(a)->getFace();
18172 if (!tmp.empty()) {face=tmp;break;}
18173 }
18174 }
18175 if ((!x.empty() && x.pos(face)!=-1) || url.empty()) {
18176 continue;
18177 }
18178 if (url.startsWithNoCase(lString32("res://")) || url.startsWithNoCase(lString32("file://"))) {
18179 if (!fontMan->RegisterExternalFont(item->getUrl(), item->getFace(), item->getBold(), item->getItalic())) {
18180 //CRLog::error("Failed to register external font face: %s file: %s", item->getFace().c_str(), LCSTR(item->getUrl()));
18181 }
18182 continue;
18183 }
18184 else {
18185 if (!fontMan->RegisterDocumentFont(getDocIndex(), _container, item->getUrl(), item->getFace(), item->getBold(), item->getItalic())) {
18186 //CRLog::error("Failed to register document font face: %s file: %s", item->getFace().c_str(), LCSTR(item->getUrl()));
18187 lString32 fontface = lString32("");
18188 for (int j = 0; j < cnt; j = j + 1) {
18189 fontface = flist[j];
18190 do { (fontface.replace(lString32(" "), lString32("\0"))); }
18191 while (fontface.pos(lString32(" ")) != -1);
18192 do { (url.replace(lString32(" "), lString32("\0"))); }
18193 while (url.pos(lString32(" ")) != -1);
18194 if (fontface.lowercase().pos(url.lowercase()) != -1) {
18195 if(fontMan->SetAlias(face, UnicodeToLocal(flist[j]), getDocIndex(),item->getBold(),item->getItalic())){
18196 x.append(face).append(lString8(","));
18197 CRLog::debug("font-face %s matches local font %s",face.c_str(),LCSTR(flist[j]));
18198 break;}
18199 }
18200 }
18201 }
18202 }
18203 }
18204 }
18205 /// unregister embedded document fonts in font manager, if any exist in document
unregisterEmbeddedFonts()18206 void ldomDocument::unregisterEmbeddedFonts()
18207 {
18208 fontMan->UnregisterDocumentFonts(_docIndex);
18209 }
18210
18211 /// returns object image stream
getObjectImageStream(lString32 refName)18212 LVStreamRef ldomDocument::getObjectImageStream( lString32 refName )
18213 {
18214 LVStreamRef ref;
18215 if ( refName.startsWith(lString32(BLOB_NAME_PREFIX)) ) {
18216 return _blobCache.getBlob(refName);
18217 }
18218 if ( refName.length() > 10 && refName[4] == ':' && refName.startsWith(lString32("data:image/")) ) {
18219 // <img src="data:image/png;base64,iVBORw0KG...>
18220 lString32 data = refName.substr(0, 50);
18221 int pos = data.pos(U";base64,");
18222 if ( pos > 0 ) {
18223 lString8 b64data = UnicodeToLocal(refName.substr(pos+8));
18224 ref = LVStreamRef(new LVBase64Stream(b64data));
18225 return ref;
18226 }
18227 }
18228 if ( refName[0]!='#' ) {
18229 if ( !getContainer().isNull() ) {
18230 lString32 name = refName;
18231 if ( !getCodeBase().empty() )
18232 name = getCodeBase() + refName;
18233 ref = getContainer()->OpenStream(name.c_str(), LVOM_READ);
18234 if ( ref.isNull() ) {
18235 lString32 fname = getProps()->getStringDef( DOC_PROP_FILE_NAME, "" );
18236 fname = LVExtractFilenameWithoutExtension(fname);
18237 if ( !fname.empty() ) {
18238 lString32 fn = fname + "_img";
18239 // if ( getContainer()->GetObjectInfo(fn) ) {
18240
18241 // }
18242 lString32 name = fn + "/" + refName;
18243 if ( !getCodeBase().empty() )
18244 name = getCodeBase() + name;
18245 ref = getContainer()->OpenStream(name.c_str(), LVOM_READ);
18246 }
18247 }
18248 if ( ref.isNull() )
18249 CRLog::error("Cannot open stream by name %s", LCSTR(name));
18250 }
18251 return ref;
18252 }
18253 lUInt32 refValueId = findAttrValueIndex( refName.c_str() + 1 );
18254 if ( refValueId == (lUInt32)-1 ) {
18255 return ref;
18256 }
18257 ldomNode * objnode = getNodeById( refValueId );
18258 if ( !objnode || !objnode->isElement())
18259 return ref;
18260 ref = objnode->createBase64Stream();
18261 return ref;
18262 }
18263
18264 /// returns object image source
getObjectImageSource(lString32 refName)18265 LVImageSourceRef ldomDocument::getObjectImageSource( lString32 refName )
18266 {
18267 LVStreamRef stream = getObjectImageStream( refName );
18268 if (stream.isNull())
18269 return LVImageSourceRef();
18270 return LVCreateStreamImageSource( stream );
18271 }
18272
resetNodeNumberingProps()18273 void ldomDocument::resetNodeNumberingProps()
18274 {
18275 lists.clear();
18276 }
18277
getNodeNumberingProps(lUInt32 nodeDataIndex)18278 ListNumberingPropsRef ldomDocument::getNodeNumberingProps( lUInt32 nodeDataIndex )
18279 {
18280 return lists.get(nodeDataIndex);
18281 }
18282
setNodeNumberingProps(lUInt32 nodeDataIndex,ListNumberingPropsRef v)18283 void ldomDocument::setNodeNumberingProps( lUInt32 nodeDataIndex, ListNumberingPropsRef v )
18284 {
18285 lists.set(nodeDataIndex, v);
18286 }
18287
18288 /// returns the sum of this node and its parents' top and bottom margins, borders and paddings
getSurroundingAddedHeight()18289 int ldomNode::getSurroundingAddedHeight()
18290 {
18291 int h = 0;
18292 ldomNode * n = this;
18293 while (true) {
18294 ldomNode * parent = n->getParentNode();
18295 lvdom_element_render_method rm = n->getRendMethod();
18296 if ( rm != erm_inline && rm != erm_invisible && rm != erm_killed) {
18297 // Add offset of border and padding
18298 int base_width = 0;
18299 if ( parent && !(parent->isNull()) ) {
18300 // margins and padding in % are scaled according to parent's width
18301 RenderRectAccessor fmt( parent );
18302 base_width = fmt.getWidth();
18303 }
18304 int em = n->getFont()->getSize();
18305 css_style_ref_t style = n->getStyle();
18306 h += lengthToPx( style->margin[2], base_width, em ); // top margin
18307 h += lengthToPx( style->margin[3], base_width, em ); // bottom margin
18308 h += lengthToPx( style->padding[2], base_width, em ); // top padding
18309 h += lengthToPx( style->padding[3], base_width, em ); // bottom padding
18310 h += measureBorder(n, 0); // top border
18311 h += measureBorder(n, 2); // bottom border
18312 }
18313 if ( !parent || parent->isNull() )
18314 break;
18315 n = parent;
18316 }
18317 return h;
18318 }
18319
18320 /// formats final block
18321 // 'fmt' is the rect of the block node, and MUST have its width set
18322 // (as ::renderFinalBlock( this, f.get(), fmt...) needs it to compute text-indent in %
18323 // 'int width' is the available width for the inner content, and so
18324 // caller must exclude block node padding from it.
renderFinalBlock(LFormattedTextRef & frmtext,RenderRectAccessor * fmt,int width,BlockFloatFootprint * float_footprint)18325 int ldomNode::renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor * fmt, int width, BlockFloatFootprint * float_footprint )
18326 {
18327 ASSERT_NODE_NOT_NULL;
18328 if ( !isElement() )
18329 return 0;
18330 //CRLog::trace("renderFinalBlock()");
18331 CVRendBlockCache & cache = getDocument()->getRendBlockCache();
18332 LFormattedTextRef f;
18333 lvdom_element_render_method rm = getRendMethod();
18334
18335 if ( cache.get( this, f ) ) {
18336 if ( f->isReusable() ) {
18337 frmtext = f;
18338 if ( rm != erm_final )
18339 return 0;
18340 //RenderRectAccessor fmt( this );
18341 //CRLog::trace("Found existing formatted object for node #%08X", (lUInt32)this);
18342 return fmt->getHeight();
18343 }
18344 // Not resuable: remove it, just to be sure it's properly freed
18345 cache.remove( this );
18346 }
18347 f = getDocument()->createFormattedText();
18348 if ( rm != erm_final )
18349 return 0;
18350
18351 /// Render whole node content as single formatted object
18352
18353 // Get some properties cached in this node's RenderRectAccessor
18354 // and set the initial flags and lang_cfg (for/from the final node
18355 // itself) for renderFinalBlock(),
18356 int direction = RENDER_RECT_PTR_GET_DIRECTION(fmt);
18357 lUInt32 flags = styleToTextFmtFlags( true, getStyle(), 0, direction );
18358 int lang_node_idx = fmt->getLangNodeIndex();
18359 TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg(lang_node_idx>0 ? getDocument()->getTinyNode(lang_node_idx) : NULL);
18360
18361 // Add this node's inner content (text and children nodes) as source text
18362 // and image fragments into the empty LFormattedText object
18363 ::renderFinalBlock( this, f.get(), fmt, flags, 0, -1, lang_cfg );
18364 // We need to store this LFormattedTextRef in the cache for it to
18365 // survive when leaving this function (some callers do use it).
18366 cache.set( this, f );
18367
18368 // Gather some outer properties and context, so we can format (render)
18369 // the inner content in that context.
18370 // This page_h we provide to f->Format() is only used to enforce a max height to images
18371 int page_h = getDocument()->getPageHeight();
18372 // Save or restore outer floats footprint (it is only provided
18373 // when rendering the document - when this is called to draw the
18374 // node, or search for text and links, we need to get it from
18375 // the cached RenderRectAccessor).
18376 BlockFloatFootprint restored_float_footprint; // (need to be available when we exit the else {})
18377 if (float_footprint) { // Save it in this node's RenderRectAccessor
18378 float_footprint->store( this );
18379 }
18380 else { // Restore it from this node's RenderRectAccessor
18381 float_footprint = &restored_float_footprint;
18382 float_footprint->restore( this, (lUInt16)width );
18383 }
18384 if ( !getDocument()->isRendered() ) {
18385 // Full rendering in progress: avoid some uneeded work that
18386 // is only needed when we'll be drawing the formatted text
18387 // (like alignLign()): this will mark it as not reusable, and
18388 // one that is on a page to be drawn will be reformatted .
18389 f->requestLightFormatting();
18390 }
18391 int usable_left_overflow = fmt->getUsableLeftOverflow();
18392 int usable_right_overflow = fmt->getUsableRightOverflow();
18393
18394 // Note: some properties are set into LFormattedText by lvrend.cpp's renderFinalBlock(),
18395 // while some others are only passed below as parameters to LFormattedText->Format().
18396 // The former should logically be source inner content properties (strut, text indent)
18397 // while the latter should be formatting and outer context properties (block width,
18398 // page height...).
18399 // There might be a few drifts from that logic, or duplicates ('direction' is
18400 // passed both ways), that could need a little rework.
18401
18402 // Format/render inner content: this makes lines and words, which are
18403 // cached into the LFormattedText and ready to be used for drawing
18404 // and text selection.
18405 int h = f->Format((lUInt16)width, (lUInt16)page_h, direction, usable_left_overflow, usable_right_overflow,
18406 getDocument()->getHangingPunctiationEnabled(), float_footprint);
18407 frmtext = f;
18408 //CRLog::trace("Created new formatted object for node #%08X", (lUInt32)this);
18409 return h;
18410 }
18411
18412 /// formats final block again after change, returns true if size of block is changed
18413 /// (not used anywhere, not updated to use RENDER_RECT_HAS_FLAG(fmt, INNER_FIELDS_SET)
refreshFinalBlock()18414 bool ldomNode::refreshFinalBlock()
18415 {
18416 ASSERT_NODE_NOT_NULL;
18417 if ( getRendMethod() != erm_final )
18418 return false;
18419 // TODO: implement reformatting of one node
18420 CVRendBlockCache & cache = getDocument()->getRendBlockCache();
18421 cache.remove( this );
18422 RenderRectAccessor fmt( this );
18423 lvRect oldRect, newRect;
18424 fmt.getRect( oldRect );
18425 LFormattedTextRef txtform;
18426 int width = fmt.getWidth();
18427 renderFinalBlock( txtform, &fmt, width-measureBorder(this,1)-measureBorder(this,3)
18428 -lengthToPx(this->getStyle()->padding[0],fmt.getWidth(),this->getFont()->getSize())
18429 -lengthToPx(this->getStyle()->padding[1],fmt.getWidth(),this->getFont()->getSize()));
18430 fmt.getRect( newRect );
18431 if ( oldRect == newRect )
18432 return false;
18433 // TODO: relocate other blocks
18434 return true;
18435 }
18436
18437 #endif
18438
18439 /// replace node with r/o persistent implementation
persist()18440 ldomNode * ldomNode::persist()
18441 {
18442 ASSERT_NODE_NOT_NULL;
18443 #if BUILD_LITE!=1
18444 if ( !isPersistent() ) {
18445 if ( isElement() ) {
18446 // ELEM->PELEM
18447 tinyElement * elem = NPELEM;
18448 int attrCount = elem->_attrs.length();
18449 int childCount = elem->_children.length();
18450 _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_PELEMENT;
18451 _data._pelem_addr = getDocument()->_elemStorage.allocElem(_handle._dataIndex, elem->_parentNode ? elem->_parentNode->_handle._dataIndex : 0, elem->_children.length(), elem->_attrs.length() );
18452 ElementDataStorageItem * data = getDocument()->_elemStorage.getElem(_data._pelem_addr);
18453 data->nsid = elem->_nsid;
18454 data->id = elem->_id;
18455 lUInt16 * attrs = data->attrs();
18456 int i;
18457 for ( i=0; i<attrCount; i++ ) {
18458 const lxmlAttribute * attr = elem->_attrs[i];
18459 attrs[i * 4] = attr->nsid; // namespace
18460 attrs[i * 4 + 1] = attr->id; // id
18461 attrs[i * 4 + 2] = (lUInt16)(attr->index & 0xFFFF);// value lower 2-bytes
18462 attrs[i * 4 + 3] = (lUInt16)(attr->index >> 16);// value higher 2-bytes
18463 }
18464 for ( i=0; i<childCount; i++ ) {
18465 data->children[i] = elem->_children[i];
18466 }
18467 data->rendMethod = (lUInt8)elem->_rendMethod;
18468 delete elem;
18469 } else {
18470 // TEXT->PTEXT
18471 lString8 utf8 = _data._text_ptr->getText();
18472 lUInt32 parentIndex = _data._text_ptr->getParentIndex();
18473 delete _data._text_ptr;
18474 _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_PTEXT;
18475 _data._ptext_addr = getDocument()->_textStorage.allocText(_handle._dataIndex, parentIndex, utf8 );
18476 // change type
18477 }
18478 }
18479 #endif
18480 return this;
18481 }
18482
18483 /// replace node with r/w implementation
modify()18484 ldomNode * ldomNode::modify()
18485 {
18486 ASSERT_NODE_NOT_NULL;
18487 #if BUILD_LITE!=1
18488 if ( isPersistent() ) {
18489 if ( isElement() ) {
18490 // PELEM->ELEM
18491 ElementDataStorageItem * data = getDocument()->_elemStorage.getElem(_data._pelem_addr);
18492 tinyElement * elem = new tinyElement(getDocument(), getParentNode(), data->nsid, data->id );
18493 for ( int i=0; i<data->childCount; i++ )
18494 elem->_children.add( data->children[i] );
18495 for ( int i=0; i<data->attrCount; i++ )
18496 elem->_attrs.add( data->attr(i) );
18497 _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_ELEMENT;
18498 elem->_rendMethod = (lvdom_element_render_method)data->rendMethod;
18499 getDocument()->_elemStorage.freeNode( _data._pelem_addr );
18500 NPELEM = elem;
18501 } else {
18502 // PTEXT->TEXT
18503 // convert persistent text to mutable
18504 lString8 utf8 = getDocument()->_textStorage.getText(_data._ptext_addr);
18505 lUInt32 parentIndex = getDocument()->_textStorage.getParent(_data._ptext_addr);
18506 getDocument()->_textStorage.freeNode( _data._ptext_addr );
18507 _data._text_ptr = new ldomTextNode( parentIndex, utf8 );
18508 // change type
18509 _handle._dataIndex = (_handle._dataIndex & ~0xF) | NT_TEXT;
18510 }
18511 }
18512 #endif
18513 return this;
18514 }
18515
18516
18517 /// dumps memory usage statistics to debug log
dumpStatistics()18518 void tinyNodeCollection::dumpStatistics()
18519 {
18520 CRLog::info("*** Document memory usage: "
18521 "elements:%d, textNodes:%d, "
18522 "ptext=("
18523 "%d uncompressed), "
18524 "ptelems=("
18525 "%d uncompressed), "
18526 "rects=("
18527 "%d uncompressed), "
18528 "nodestyles=("
18529 "%d uncompressed), "
18530 "styles:%d, fonts:%d, renderedNodes:%d, "
18531 "totalNodes:%d(%dKb), mutableElements:%d(~%dKb)",
18532 _elemCount, _textCount,
18533 _textStorage.getUncompressedSize(),
18534 _elemStorage.getUncompressedSize(),
18535 _rectStorage.getUncompressedSize(),
18536 _styleStorage.getUncompressedSize(),
18537 _styles.length(), _fonts.length(),
18538 #if BUILD_LITE!=1
18539 ((ldomDocument*)this)->_renderedBlockCache.length(),
18540 #else
18541 0,
18542 #endif
18543 _itemCount, _itemCount*16/1024,
18544 _tinyElementCount, _tinyElementCount*(sizeof(tinyElement)+8*4)/1024 );
18545 }
getStatistics()18546 lString32 tinyNodeCollection::getStatistics()
18547 {
18548 lString32 s;
18549 s << "Elements: " << fmt::decimal(_elemCount) << ", " << fmt::decimal(_elemStorage.getUncompressedSize()/1024) << " KB\n";
18550 s << "Text nodes: " << fmt::decimal(_textCount) << ", " << fmt::decimal(_textStorage.getUncompressedSize()/1024) << " KB\n";
18551 s << "Styles: " << fmt::decimal(_styles.length()) << ", " << fmt::decimal(_styleStorage.getUncompressedSize()/1024) << " KB\n";
18552 s << "Font instances: " << fmt::decimal(_fonts.length()) << "\n";
18553 s << "Rects: " << fmt::decimal(_rectStorage.getUncompressedSize()/1024) << " KB\n";
18554 #if BUILD_LITE!=1
18555 s << "Cached rendered blocks: " << fmt::decimal(((ldomDocument*)this)->_renderedBlockCache.length()) << "\n";
18556 #endif
18557 s << "Total nodes: " << fmt::decimal(_itemCount) << ", " << fmt::decimal(_itemCount*16/1024) << " KB\n";
18558 s << "Mutable elements: " << fmt::decimal(_tinyElementCount) << ", " << fmt::decimal(_tinyElementCount*(sizeof(tinyElement)+8*4)/1024) << " KB";
18559 return s;
18560 }
18561
18562
18563 /// returns position pointer
getXPointer()18564 ldomXPointer LVTocItem::getXPointer()
18565 {
18566 if ( _position.isNull() && !_path.empty() ) {
18567 _position = _doc->createXPointer( _path );
18568 if ( _position.isNull() ) {
18569 CRLog::trace("TOC node is not found for path %s", LCSTR(_path) );
18570 } else {
18571 CRLog::trace("TOC node is found for path %s", LCSTR(_path) );
18572 // CRLog::trace(" gives xpointer: %s", UnicodeToLocal(_position.toString()).c_str());
18573 }
18574 }
18575 return _position;
18576 }
18577
18578 /// returns position path
getPath()18579 lString32 LVTocItem::getPath()
18580 {
18581 if ( _path.empty() && !_position.isNull())
18582 _path = _position.toString();
18583 return _path;
18584 }
18585
18586 /// returns Y position
getY()18587 int LVTocItem::getY()
18588 {
18589 #if BUILD_LITE!=1
18590 return getXPointer().toPoint().y;
18591 #else
18592 return 0;
18593 #endif
18594 }
18595
18596 /// serialize to byte array (pointer will be incremented by number of bytes written)
serialize(SerialBuf & buf)18597 bool LVTocItem::serialize( SerialBuf & buf )
18598 {
18599 // LVTocItem * _parent;
18600 // int _level;
18601 // int _index;
18602 // int _page;
18603 // int _percent;
18604 // lString32 _name;
18605 // ldomXPointer _position;
18606 // LVPtrVector<LVTocItem> _children;
18607
18608 buf << (lUInt32)_level << (lUInt32)_index << (lUInt32)_page << (lUInt32)_percent << (lUInt32)_children.length() << _name << getPath();
18609 if ( buf.error() )
18610 return false;
18611 for ( int i=0; i<_children.length(); i++ ) {
18612 _children[i]->serialize( buf );
18613 if ( buf.error() )
18614 return false;
18615 }
18616 return !buf.error();
18617 }
18618
18619 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserialize(ldomDocument * doc,SerialBuf & buf)18620 bool LVTocItem::deserialize( ldomDocument * doc, SerialBuf & buf )
18621 {
18622 if ( buf.error() )
18623 return false;
18624 lInt32 childCount = 0;
18625 buf >> _level >> _index >> _page >> _percent >> childCount >> _name >> _path;
18626 // CRLog::trace("[%d] %05d %s %s", _level, _page, LCSTR(_name), LCSTR(_path));
18627 if ( buf.error() )
18628 return false;
18629 // if ( _level>0 ) {
18630 // _position = doc->createXPointer( _path );
18631 // if ( _position.isNull() ) {
18632 // CRLog::error("Cannot find TOC node by path %s", LCSTR(_path) );
18633 // buf.seterror();
18634 // return false;
18635 // }
18636 // }
18637 for ( int i=0; i<childCount; i++ ) {
18638 LVTocItem * item = new LVTocItem(doc);
18639 if ( !item->deserialize( doc, buf ) ) {
18640 delete item;
18641 return false;
18642 }
18643 item->_parent = this;
18644 _children.add( item );
18645 if ( buf.error() )
18646 return false;
18647 }
18648 return true;
18649 }
18650
18651 /// returns page number
18652 //int LVTocItem::getPageNum( LVRendPageList & pages )
18653 //{
18654 // return getSectionPage( _position.getNode(), pages );
18655 //}
18656
18657
makeTocFromCrHintsOrHeadings(ldomNode * node,bool ensure_cr_hints)18658 static inline void makeTocFromCrHintsOrHeadings( ldomNode * node, bool ensure_cr_hints )
18659 {
18660 int level;
18661 if ( ensure_cr_hints ) {
18662 css_style_ref_t style = node->getStyle();
18663 if ( STYLE_HAS_CR_HINT(style, TOC_IGNORE) )
18664 return; // requested to be ignored via style tweaks
18665 if ( STYLE_HAS_CR_HINT(style, TOC_LEVELS_MASK) ) {
18666 if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL1) ) level = 1;
18667 else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL2) ) level = 2;
18668 else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL3) ) level = 3;
18669 else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL4) ) level = 4;
18670 else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL5) ) level = 5;
18671 else if ( STYLE_HAS_CR_HINT(style, TOC_LEVEL6) ) level = 6;
18672 else level = 7; // should not be reached
18673 }
18674 else if ( node->getNodeId() >= el_h1 && node->getNodeId() <= el_h6 )
18675 // el_h1 .. el_h6 are consecutive and ordered in include/fb2def.h
18676 level = node->getNodeId() - el_h1 + 1;
18677 else
18678 return;
18679 }
18680 else {
18681 if ( node->getNodeId() >= el_h1 && node->getNodeId() <= el_h6 )
18682 // el_h1 .. el_h6 are consecutive and ordered in include/fb2def.h
18683 level = node->getNodeId() - el_h1 + 1;
18684 else
18685 return;
18686 }
18687 lString32 title = removeSoftHyphens( node->getText(' ') );
18688 ldomXPointer xp = ldomXPointer(node, 0);
18689 LVTocItem * root = node->getDocument()->getToc();
18690 LVTocItem * parent = root;
18691 // Find adequate parent, or create intermediates
18692 int plevel = 1;
18693 while (plevel < level) {
18694 int nbc = parent->getChildCount();
18695 if (nbc) { // use the latest child
18696 parent = parent->getChild(nbc-1);
18697 }
18698 else {
18699 // If we'd like to stick it to the last parent found, even if
18700 // of wrong level, just do: break;
18701 // But it is cleaner to create intermediate(s)
18702 parent = parent->addChild(U"", xp, lString32::empty_str);
18703 }
18704 plevel++;
18705 }
18706 parent->addChild(title, xp, lString32::empty_str);
18707 }
18708
makeTocFromHeadings(ldomNode * node)18709 static void makeTocFromHeadings( ldomNode * node )
18710 {
18711 makeTocFromCrHintsOrHeadings( node, false );
18712 }
18713
makeTocFromCrHintsOrHeadings(ldomNode * node)18714 static void makeTocFromCrHintsOrHeadings( ldomNode * node )
18715 {
18716 makeTocFromCrHintsOrHeadings( node, true );
18717 }
18718
makeTocFromDocFragments(ldomNode * node)18719 static void makeTocFromDocFragments( ldomNode * node )
18720 {
18721 if ( node->getNodeId() != el_DocFragment )
18722 return;
18723 // No title, and only level 1 with DocFragments
18724 ldomXPointer xp = ldomXPointer(node, 0);
18725 LVTocItem * root = node->getDocument()->getToc();
18726 root->addChild(U"", xp, lString32::empty_str);
18727 }
18728
buildTocFromHeadings()18729 void ldomDocument::buildTocFromHeadings()
18730 {
18731 m_toc.clear();
18732 getRootNode()->recurseElements(makeTocFromHeadings);
18733 }
18734
buildAlternativeToc()18735 void ldomDocument::buildAlternativeToc()
18736 {
18737 m_toc.clear();
18738 // Look first for style tweaks specified -cr-hint: toc-level1 ... toc-level6
18739 // and/or headings (H1...H6)
18740 getRootNode()->recurseElements(makeTocFromCrHintsOrHeadings);
18741 // If no heading or hints found, fall back to gathering DocFraments
18742 if ( !m_toc.getChildCount() )
18743 getRootNode()->recurseElements(makeTocFromDocFragments);
18744 // m_toc.setAlternativeTocFlag() uses the root toc item _page property
18745 // (never used for the root node) to store the fact this is an
18746 // alternatve TOC. This info can then be serialized to cache and
18747 // retrieved without any additional work or space overhead.
18748 m_toc.setAlternativeTocFlag();
18749 // cache file will have to be updated with the alt TOC
18750 setCacheFileStale(true);
18751 _toc_from_cache_valid = false; // to force update of page numbers
18752 }
18753
18754 /// returns position pointer
getXPointer()18755 ldomXPointer LVPageMapItem::getXPointer()
18756 {
18757 if ( _position.isNull() && !_path.empty() ) {
18758 _position = _doc->createXPointer( _path );
18759 if ( _position.isNull() ) {
18760 CRLog::trace("LVPageMapItem node is not found for path %s", LCSTR(_path) );
18761 } else {
18762 CRLog::trace("LVPageMapItem node is found for path %s", LCSTR(_path) );
18763 }
18764 }
18765 return _position;
18766 }
18767
18768 /// returns position path
getPath()18769 lString32 LVPageMapItem::getPath()
18770 {
18771 if ( _path.empty() && !_position.isNull())
18772 _path = _position.toString();
18773 return _path;
18774 }
18775
18776 /// returns Y position
getDocY(bool refresh)18777 int LVPageMapItem::getDocY(bool refresh)
18778 {
18779 #if BUILD_LITE!=1
18780 if ( _doc_y < 0 || refresh )
18781 _doc_y = getXPointer().toPoint().y;
18782 if ( _doc_y < 0 && !_position.isNull() ) {
18783 // We got a xpointer, that did not resolve to a point.
18784 // It may be because the node it points to is invisible,
18785 // which may happen with pagebreak spans (that may not
18786 // be empty, and were set to "display: none").
18787 ldomXPointerEx xp = _position;
18788 if ( !xp.isVisible() ) {
18789 if ( xp.nextVisibleText() ) {
18790 _doc_y = xp.toPoint().y;
18791 }
18792 else {
18793 xp = _position;
18794 if ( xp.prevVisibleText() ) {
18795 _doc_y = xp.toPoint().y;
18796 }
18797 }
18798 }
18799 }
18800 return _doc_y;
18801 #else
18802 return 0;
18803 #endif
18804 }
18805
18806 /// serialize to byte array (pointer will be incremented by number of bytes written)
serialize(SerialBuf & buf)18807 bool LVPageMapItem::serialize( SerialBuf & buf )
18808 {
18809 buf << (lUInt32)_index << (lUInt32)_page << (lUInt32)_doc_y << _label << getPath();
18810 return !buf.error();
18811 }
18812
18813 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserialize(ldomDocument * doc,SerialBuf & buf)18814 bool LVPageMapItem::deserialize( ldomDocument * doc, SerialBuf & buf )
18815 {
18816 if ( buf.error() )
18817 return false;
18818 buf >> _index >> _page >> _doc_y >> _label >> _path;
18819 return !buf.error();
18820
18821 }
18822 /// serialize to byte array (pointer will be incremented by number of bytes written)
serialize(SerialBuf & buf)18823 bool LVPageMap::serialize( SerialBuf & buf )
18824 {
18825 buf << (lUInt32)_page_info_valid << (lUInt32)_children.length() << _source;
18826 if ( buf.error() )
18827 return false;
18828 for ( int i=0; i<_children.length(); i++ ) {
18829 _children[i]->serialize( buf );
18830 if ( buf.error() )
18831 return false;
18832 }
18833 return !buf.error();
18834 }
18835
18836 /// deserialize from byte array (pointer will be incremented by number of bytes read)
deserialize(ldomDocument * doc,SerialBuf & buf)18837 bool LVPageMap::deserialize( ldomDocument * doc, SerialBuf & buf )
18838 {
18839 if ( buf.error() )
18840 return false;
18841 lUInt32 childCount = 0;
18842 lUInt32 pageInfoValid = 0;
18843 buf >> pageInfoValid >> childCount >> _source;
18844 if ( buf.error() )
18845 return false;
18846 _page_info_valid = (bool)pageInfoValid;
18847 for ( int i=0; i<childCount; i++ ) {
18848 LVPageMapItem * item = new LVPageMapItem(doc);
18849 if ( !item->deserialize( doc, buf ) ) {
18850 delete item;
18851 return false;
18852 }
18853 _children.add( item );
18854 if ( buf.error() )
18855 return false;
18856 }
18857 return true;
18858 }
18859
18860
18861 #if 0 && defined(_DEBUG)
18862
18863 #define TEST_FILE_NAME "/tmp/test-cache-file.dat"
18864
18865 #include <lvdocview.h>
18866
18867 void testCacheFile()
18868 {
18869 #if BUILD_LITE!=1
18870 CRLog::info("Starting CacheFile unit test");
18871 lUInt8 data1[] = {'T', 'e', 's', 't', 'd', 'a', 't', 'a', 1, 2, 3, 4, 5, 6, 7};
18872 lUInt8 data2[] = {'T', 'e', 's', 't', 'd', 'a', 't', 'a', '2', 1, 2, 3, 4, 5, 6, 7};
18873 lUInt8 * buf1;
18874 lUInt8 * buf2;
18875 int sz1;
18876 int sz2;
18877 lString32 fn(TEST_FILE_NAME);
18878
18879 {
18880 lUInt8 data1[] = {'T', 'e', 's', 't', 'D', 'a', 't', 'a', '1'};
18881 lUInt8 data2[] = {'T', 'e', 's', 't', 'D', 'a', 't', 'a', '2', 1, 2, 3, 4, 5, 6, 7};
18882 LVStreamRef s = LVOpenFileStream( fn.c_str(), LVOM_APPEND );
18883 s->SetPos(0);
18884 s->Write(data1, sizeof(data1), NULL);
18885 s->SetPos(4096);
18886 s->Write(data1, sizeof(data1), NULL);
18887 s->SetPos(8192);
18888 s->Write(data2, sizeof(data2), NULL);
18889 s->SetPos(4096);
18890 s->Write(data2, sizeof(data2), NULL);
18891 lUInt8 buf[16];
18892 s->SetPos(0);
18893 s->Read(buf, sizeof(data1), NULL);
18894 MYASSERT(!memcmp(buf, data1, sizeof(data1)), "read 1 content");
18895 s->SetPos(4096);
18896 s->Read(buf, sizeof(data2), NULL);
18897 MYASSERT(!memcmp(buf, data2, sizeof(data2)), "read 2 content");
18898
18899 //return;
18900 }
18901
18902 // write
18903 {
18904 CacheFile f;
18905 MYASSERT(f.open(cs32("/tmp/blabla-not-exits-file-name"))==false, "Wrong failed open result");
18906 MYASSERT(f.create( fn )==true, "new file created");
18907 MYASSERT(f.write(CBT_TEXT_DATA, 1, data1, sizeof(data1), true)==true, "write 1");
18908 MYASSERT(f.write(CBT_ELEM_DATA, 3, data2, sizeof(data2), false)==true, "write 2");
18909
18910 MYASSERT(f.read(CBT_TEXT_DATA, 1, buf1, sz1)==true, "read 1");
18911 MYASSERT(f.read(CBT_ELEM_DATA, 3, buf2, sz2)==true, "read 2");
18912 MYASSERT(sz1==sizeof(data1), "read 1 size");
18913 MYASSERT(!memcmp(buf1, data1, sizeof(data1)), "read 1 content");
18914 MYASSERT(sz2==sizeof(data2), "read 2 size");
18915 MYASSERT(!memcmp(buf2, data2, sizeof(data2)), "read 2 content");
18916 }
18917 // write
18918 {
18919 CacheFile f;
18920 MYASSERT(f.open(fn)==true, "Wrong failed open result");
18921 MYASSERT(f.read(CBT_TEXT_DATA, 1, buf1, sz1)==true, "read 1");
18922 MYASSERT(f.read(CBT_ELEM_DATA, 3, buf2, sz2)==true, "read 2");
18923 MYASSERT(sz1==sizeof(data1), "read 1 size");
18924 MYASSERT(!memcmp(buf1, data1, sizeof(data1)), "read 1 content");
18925 MYASSERT(sz2==sizeof(data2), "read 2 size");
18926 MYASSERT(!memcmp(buf2, data2, sizeof(data2)), "read 2 content");
18927 }
18928
18929 CRLog::info("Finished CacheFile unit test");
18930 #endif
18931 }
18932
18933 #ifdef _WIN32
18934 #define TEST_FN_TO_OPEN "/projects/test/bibl.fb2.zip"
18935 #else
18936 #define TEST_FN_TO_OPEN "/home/lve/src/test/bibl.fb2.zip"
18937 #endif
18938
18939 void runFileCacheTest()
18940 {
18941 #if BUILD_LITE!=1
18942 CRLog::info("====Cache test started =====");
18943
18944 // init and clear cache
18945 ldomDocCache::init(cs32("/tmp/cr3cache"), 100);
18946 MYASSERT(ldomDocCache::enabled(), "clear cache");
18947
18948 {
18949 CRLog::info("====Open document and save to cache=====");
18950 LVDocView view(4);
18951 view.Resize(600, 800);
18952 bool res = view.LoadDocument(TEST_FN_TO_OPEN);
18953 MYASSERT(res, "load document");
18954 view.getPageImage(0);
18955 view.getDocProps()->setInt(PROP_FORCED_MIN_FILE_SIZE_TO_CACHE, 30000);
18956 view.swapToCache();
18957 //MYASSERT(res, "swap to cache");
18958 view.getDocument()->dumpStatistics();
18959 }
18960 {
18961 CRLog::info("====Open document from cache=====");
18962 LVDocView view(4);
18963 view.Resize(600, 800);
18964 bool res = view.LoadDocument(TEST_FN_TO_OPEN);
18965 MYASSERT(res, "load document");
18966 view.getDocument()->dumpStatistics();
18967 view.getPageImage(0);
18968 }
18969 CRLog::info("====Cache test finished=====");
18970 #endif
18971 }
18972
18973 void runBasicTinyDomUnitTests()
18974 {
18975 CRLog::info("==========================");
18976 CRLog::info("Starting tinyDOM unit test");
18977 ldomDocument * doc = new ldomDocument();
18978 ldomNode * root = doc->getRootNode();//doc->allocTinyElement( NULL, 0, 0 );
18979 MYASSERT(root!=NULL,"root != NULL");
18980
18981 int el_p = doc->getElementNameIndex(U"p");
18982 int el_title = doc->getElementNameIndex(U"title");
18983 int el_strong = doc->getElementNameIndex(U"strong");
18984 int el_emphasis = doc->getElementNameIndex(U"emphasis");
18985 int attr_id = doc->getAttrNameIndex(U"id");
18986 int attr_name = doc->getAttrNameIndex(U"name");
18987 static lUInt16 path1[] = {el_title, el_p, 0};
18988 static lUInt16 path2[] = {el_title, el_p, el_strong, 0};
18989
18990 CRLog::info("* simple DOM operations, tinyElement");
18991 MYASSERT(root->isRoot(),"root isRoot");
18992 MYASSERT(root->getParentNode()==NULL,"root parent is null");
18993 MYASSERT(root->getParentIndex()==0,"root parent index == 0");
18994 MYASSERT(root->getChildCount()==0,"empty root child count");
18995 ldomNode * el1 = root->insertChildElement(el_p);
18996 MYASSERT(root->getChildCount()==1,"root child count 1");
18997 MYASSERT(el1->getParentNode()==root,"element parent node");
18998 MYASSERT(el1->getParentIndex()==root->getDataIndex(),"element parent node index");
18999 MYASSERT(el1->getNodeId()==el_p, "node id");
19000 MYASSERT(el1->getNodeNsId()==LXML_NS_NONE, "node nsid");
19001 MYASSERT(!el1->isRoot(),"elem not isRoot");
19002 ldomNode * el2 = root->insertChildElement(el_title);
19003 MYASSERT(root->getChildCount()==2,"root child count 2");
19004 MYASSERT(el2->getNodeId()==el_title, "node id");
19005 MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "node nsid");
19006 lString32 nodename = el2->getNodeName();
19007 //CRLog::debug("node name: %s", LCSTR(nodename));
19008 MYASSERT(nodename==U"title","node name");
19009 ldomNode * el21 = el2->insertChildElement(el_p);
19010 MYASSERT(root->getNodeLevel()==1,"node level 1");
19011 MYASSERT(el2->getNodeLevel()==2,"node level 2");
19012 MYASSERT(el21->getNodeLevel()==3,"node level 3");
19013 MYASSERT(el21->getNodeIndex()==0,"node index single");
19014 MYASSERT(el1->getNodeIndex()==0,"node index first");
19015 MYASSERT(el2->getNodeIndex()==1,"node index last");
19016 MYASSERT(root->getNodeIndex()==0,"node index for root");
19017 MYASSERT(root->getFirstChild()==el1,"first child");
19018 MYASSERT(root->getLastChild()==el2,"last child");
19019 MYASSERT(el2->getFirstChild()==el21,"first single child");
19020 MYASSERT(el2->getLastChild()==el21,"last single child");
19021 MYASSERT(el21->getFirstChild()==NULL,"first child - no children");
19022 MYASSERT(el21->getLastChild()==NULL,"last child - no children");
19023 ldomNode * el0 = root->insertChildElement(1, LXML_NS_NONE, el_title);
19024 MYASSERT(el1->getNodeIndex()==0,"insert in the middle");
19025 MYASSERT(el0->getNodeIndex()==1,"insert in the middle");
19026 MYASSERT(el2->getNodeIndex()==2,"insert in the middle");
19027 MYASSERT(root->getChildNode(0)==el1,"child node 0");
19028 MYASSERT(root->getChildNode(1)==el0,"child node 1");
19029 MYASSERT(root->getChildNode(2)==el2,"child node 2");
19030 ldomNode * removedNode = root->removeChild( 1 );
19031 MYASSERT(removedNode==el0,"removed node");
19032 el0->destroy();
19033 MYASSERT(el0->isNull(),"destroyed node isNull");
19034 MYASSERT(root->getChildNode(0)==el1,"child node 0, after removal");
19035 MYASSERT(root->getChildNode(1)==el2,"child node 1, after removal");
19036 ldomNode * el02 = root->insertChildElement(5, LXML_NS_NONE, el_emphasis);
19037 MYASSERT(el02==el0,"removed node reusage");
19038
19039 {
19040 ldomNode * f1 = root->findChildElement(path1);
19041 MYASSERT(f1==el21, "find 1 on mutable - is el21");
19042 MYASSERT(f1->getNodeId()==el_p, "find 1 on mutable");
19043 //ldomNode * f2 = root->findChildElement(path2);
19044 //MYASSERT(f2!=NULL, "find 2 on mutable - not null");
19045 //MYASSERT(f2==el21, "find 2 on mutable - is el21");
19046 //MYASSERT(f2->getNodeId()==el_strong, "find 2 on mutable");
19047 }
19048
19049 CRLog::info("* simple DOM operations, mutable text");
19050 lString32 sampleText("Some sample text.");
19051 lString32 sampleText2("Some sample text 2.");
19052 lString32 sampleText3("Some sample text 3.");
19053 ldomNode * text1 = el1->insertChildText(sampleText);
19054 MYASSERT(text1->getText()==sampleText, "sample text 1 match unicode");
19055 MYASSERT(text1->getNodeLevel()==3,"text node level");
19056 MYASSERT(text1->getNodeIndex()==0,"text node index");
19057 MYASSERT(text1->isText(),"text node isText");
19058 MYASSERT(!text1->isElement(),"text node isElement");
19059 MYASSERT(!text1->isNull(),"text node isNull");
19060 ldomNode * text2 = el1->insertChildText(0, sampleText2);
19061 MYASSERT(text2->getNodeIndex()==0,"text node index, insert at beginning");
19062 MYASSERT(text2->getText()==sampleText2, "sample text 2 match unicode");
19063 MYASSERT(text2->getText8()==UnicodeToUtf8(sampleText2), "sample text 2 match utf8");
19064 text1->setText(sampleText2);
19065 MYASSERT(text1->getText()==sampleText2, "sample text 1 match unicode, changed");
19066 text1->setText8(UnicodeToUtf8(sampleText3));
19067 MYASSERT(text1->getText()==sampleText3, "sample text 1 match unicode, changed 8");
19068 MYASSERT(text1->getText8()==UnicodeToUtf8(sampleText3), "sample text 1 match utf8, changed");
19069
19070 MYASSERT(el1->getFirstTextChild()==text2, "firstTextNode");
19071 MYASSERT(el1->getLastTextChild()==text1, "lastTextNode");
19072 MYASSERT(el21->getLastTextChild()==NULL, "lastTextNode NULL");
19073
19074 #if BUILD_LITE!=1
19075 CRLog::info("* style cache");
19076 {
19077 css_style_ref_t style1;
19078 style1 = css_style_ref_t( new css_style_rec_t );
19079 style1->display = css_d_block;
19080 style1->white_space = css_ws_normal;
19081 style1->text_align = css_ta_left;
19082 style1->text_align_last = css_ta_left;
19083 style1->text_decoration = css_td_none;
19084 style1->text_transform = css_tt_none;
19085 style1->hyphenate = css_hyph_auto;
19086 style1->color.type = css_val_unspecified;
19087 style1->color.value = 0x000000;
19088 style1->background_color.type = css_val_unspecified;
19089 style1->background_color.value = 0xFFFFFF;
19090 style1->page_break_before = css_pb_auto;
19091 style1->page_break_after = css_pb_auto;
19092 style1->page_break_inside = css_pb_auto;
19093 style1->vertical_align.type = css_val_unspecified;
19094 style1->vertical_align.value = css_va_baseline;
19095 style1->font_family = css_ff_sans_serif;
19096 style1->font_size.type = css_val_px;
19097 style1->font_size.value = 24 << 8;
19098 style1->font_name = cs8("Arial");
19099 style1->font_weight = css_fw_400;
19100 style1->font_style = css_fs_normal;
19101 style1->font_features.type = css_val_unspecified;
19102 style1->font_features.value = 0;
19103 style1->text_indent.type = css_val_px;
19104 style1->text_indent.value = 0;
19105 style1->line_height.type = css_val_unspecified;
19106 style1->line_height.value = css_generic_normal; // line-height: normal
19107 style1->cr_hint.type = css_val_unspecified;
19108 style1->cr_hint.value = CSS_CR_HINT_NONE;
19109
19110 css_style_ref_t style2;
19111 style2 = css_style_ref_t( new css_style_rec_t );
19112 style2->display = css_d_block;
19113 style2->white_space = css_ws_normal;
19114 style2->text_align = css_ta_left;
19115 style2->text_align_last = css_ta_left;
19116 style2->text_decoration = css_td_none;
19117 style2->text_transform = css_tt_none;
19118 style2->hyphenate = css_hyph_auto;
19119 style2->color.type = css_val_unspecified;
19120 style2->color.value = 0x000000;
19121 style2->background_color.type = css_val_unspecified;
19122 style2->background_color.value = 0xFFFFFF;
19123 style2->page_break_before = css_pb_auto;
19124 style2->page_break_after = css_pb_auto;
19125 style2->page_break_inside = css_pb_auto;
19126 style2->vertical_align.type = css_val_unspecified;
19127 style2->vertical_align.value = css_va_baseline;
19128 style2->font_family = css_ff_sans_serif;
19129 style2->font_size.type = css_val_px;
19130 style2->font_size.value = 24 << 8;
19131 style2->font_name = cs8("Arial");
19132 style2->font_weight = css_fw_400;
19133 style2->font_style = css_fs_normal;
19134 style2->font_features.type = css_val_unspecified;
19135 style2->font_features.value = 0;
19136 style2->text_indent.type = css_val_px;
19137 style2->text_indent.value = 0;
19138 style2->line_height.type = css_val_unspecified;
19139 style2->line_height.value = css_generic_normal; // line-height: normal
19140 style2->cr_hint.type = css_val_unspecified;
19141 style2->cr_hint.value = CSS_CR_HINT_NONE;
19142
19143 css_style_ref_t style3;
19144 style3 = css_style_ref_t( new css_style_rec_t );
19145 style3->display = css_d_block;
19146 style3->white_space = css_ws_normal;
19147 style3->text_align = css_ta_right;
19148 style3->text_align_last = css_ta_left;
19149 style3->text_decoration = css_td_none;
19150 style3->text_transform = css_tt_none;
19151 style3->hyphenate = css_hyph_auto;
19152 style3->color.type = css_val_unspecified;
19153 style3->color.value = 0x000000;
19154 style3->background_color.type = css_val_unspecified;
19155 style3->background_color.value = 0xFFFFFF;
19156 style3->page_break_before = css_pb_auto;
19157 style3->page_break_after = css_pb_auto;
19158 style3->page_break_inside = css_pb_auto;
19159 style3->vertical_align.type = css_val_unspecified;
19160 style3->vertical_align.value = css_va_baseline;
19161 style3->font_family = css_ff_sans_serif;
19162 style3->font_size.type = css_val_px;
19163 style3->font_size.value = 24 << 8;
19164 style3->font_name = cs8("Arial");
19165 style3->font_weight = css_fw_400;
19166 style3->font_style = css_fs_normal;
19167 style3->font_features.type = css_val_unspecified;
19168 style3->font_features.value = 0;
19169 style3->text_indent.type = css_val_px;
19170 style3->text_indent.value = 0;
19171 style3->line_height.type = css_val_unspecified;
19172 style3->line_height.value = css_generic_normal; // line-height: normal
19173 style3->cr_hint.type = css_val_unspecified;
19174 style3->cr_hint.value = CSS_CR_HINT_NONE;
19175
19176 el1->setStyle(style1);
19177 css_style_ref_t s1 = el1->getStyle();
19178 MYASSERT(!s1.isNull(), "style is set");
19179 el2->setStyle(style2);
19180 MYASSERT(*style1==*style2, "identical styles : == is true");
19181 MYASSERT(calcHash(*style1)==calcHash(*style2), "identical styles have the same hashes");
19182 MYASSERT(el1->getStyle().get()==el2->getStyle().get(), "identical styles reused");
19183 el21->setStyle(style3);
19184 MYASSERT(el1->getStyle().get()!=el21->getStyle().get(), "different styles not reused");
19185 }
19186
19187 CRLog::info("* font cache");
19188 {
19189 font_ref_t font1 = fontMan->GetFont(24, 400, false, css_ff_sans_serif, cs8("DejaVu Sans"));
19190 font_ref_t font2 = fontMan->GetFont(24, 400, false, css_ff_sans_serif, cs8("DejaVu Sans"));
19191 font_ref_t font3 = fontMan->GetFont(28, 800, false, css_ff_serif, cs8("DejaVu Sans Condensed"));
19192 MYASSERT(el1->getFont().isNull(), "font is not set");
19193 el1->setFont(font1);
19194 MYASSERT(!el1->getFont().isNull(), "font is set");
19195 el2->setFont(font2);
19196 MYASSERT(*font1==*font2, "identical fonts : == is true");
19197 MYASSERT(calcHash(font1)==calcHash(font2), "identical styles have the same hashes");
19198 MYASSERT(el1->getFont().get()==el2->getFont().get(), "identical fonts reused");
19199 el21->setFont(font3);
19200 MYASSERT(el1->getFont().get()!=el21->getFont().get(), "different fonts not reused");
19201 }
19202
19203 CRLog::info("* persistance test");
19204
19205 el2->setAttributeValue(LXML_NS_NONE, attr_id, U"id1");
19206 el2->setAttributeValue(LXML_NS_NONE, attr_name, U"name1");
19207 MYASSERT(el2->getNodeId()==el_title, "mutable node id");
19208 MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "mutable node nsid");
19209 MYASSERT(el2->getAttributeValue(attr_id)==U"id1", "attr id1 mutable");
19210 MYASSERT(el2->getAttributeValue(attr_name)==U"name1", "attr name1 mutable");
19211 MYASSERT(el2->getAttrCount()==2, "attr count mutable");
19212 el2->persist();
19213 MYASSERT(el2->getAttributeValue(attr_id)==U"id1", "attr id1 pers");
19214 MYASSERT(el2->getAttributeValue(attr_name)==U"name1", "attr name1 pers");
19215 MYASSERT(el2->getNodeId()==el_title, "persistent node id");
19216 MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "persistent node nsid");
19217 MYASSERT(el2->getAttrCount()==2, "attr count persist");
19218
19219 {
19220 ldomNode * f1 = root->findChildElement(path1);
19221 MYASSERT(f1==el21, "find 1 on mutable - is el21");
19222 MYASSERT(f1->getNodeId()==el_p, "find 1 on mutable");
19223 }
19224
19225 el2->modify();
19226 MYASSERT(el2->getNodeId()==el_title, "mutable 2 node id");
19227 MYASSERT(el2->getNodeNsId()==LXML_NS_NONE, "mutable 2 node nsid");
19228 MYASSERT(el2->getAttributeValue(attr_id)==U"id1", "attr id1 mutable 2");
19229 MYASSERT(el2->getAttributeValue(attr_name)==U"name1", "attr name1 mutable 2");
19230 MYASSERT(el2->getAttrCount()==2, "attr count mutable 2");
19231
19232 {
19233 ldomNode * f1 = root->findChildElement(path1);
19234 MYASSERT(f1==el21, "find 1 on mutable - is el21");
19235 MYASSERT(f1->getNodeId()==el_p, "find 1 on mutable");
19236 }
19237
19238 CRLog::info("* convert to persistent");
19239 CRTimerUtil infinite;
19240 doc->persist(infinite);
19241 doc->dumpStatistics();
19242
19243 MYASSERT(el21->getFirstChild()==NULL,"first child - no children");
19244 MYASSERT(el21->isPersistent(), "persistent before insertChildElement");
19245 ldomNode * el211 = el21->insertChildElement(el_strong);
19246 MYASSERT(!el21->isPersistent(), "mutable after insertChildElement");
19247 el211->persist();
19248 MYASSERT(el211->isPersistent(), "persistent before insertChildText");
19249 el211->insertChildText(cs32(U"bla bla bla"));
19250 el211->insertChildText(cs32(U"bla bla blaw"));
19251 MYASSERT(!el211->isPersistent(), "modifable after insertChildText");
19252 //el21->insertChildElement(el_strong);
19253 MYASSERT(el211->getChildCount()==2, "child count, in mutable");
19254 el211->persist();
19255 MYASSERT(el211->getChildCount()==2, "child count, in persistent");
19256 el211->modify();
19257 MYASSERT(el211->getChildCount()==2, "child count, in mutable again");
19258 CRTimerUtil infinite2;
19259 doc->persist(infinite2);
19260
19261 ldomNode * f1 = root->findChildElement(path1);
19262 MYASSERT(f1->getNodeId()==el_p, "find 1");
19263 ldomNode * f2 = root->findChildElement(path2);
19264 MYASSERT(f2->getNodeId()==el_strong, "find 2");
19265 MYASSERT(f2 == el211, "find 2, ref");
19266
19267
19268 CRLog::info("* compacting");
19269 doc->compact();
19270 doc->dumpStatistics();
19271 #endif
19272
19273 delete doc;
19274
19275
19276 CRLog::info("Finished tinyDOM unit test");
19277
19278 CRLog::info("==========================");
19279
19280 }
19281
19282 void runCHMUnitTest()
19283 {
19284 #if CHM_SUPPORT_ENABLED==1
19285 #if BUILD_LITE!=1
19286 LVStreamRef stream = LVOpenFileStream("/home/lve/src/test/mysql.chm", LVOM_READ);
19287 MYASSERT ( !stream.isNull(), "container stream opened" );
19288 CRLog::trace("runCHMUnitTest() -- file stream opened ok");
19289 LVContainerRef dir = LVOpenCHMContainer( stream );
19290 MYASSERT ( !dir.isNull(), "container opened" );
19291 CRLog::trace("runCHMUnitTest() -- container opened ok");
19292 LVStreamRef s = dir->OpenStream(U"/index.html", LVOM_READ);
19293 MYASSERT ( !s.isNull(), "item opened" );
19294 CRLog::trace("runCHMUnitTest() -- index.html opened ok: size=%d", (int)s->GetSize());
19295 lvsize_t bytesRead = 0;
19296 char buf[1000];
19297 MYASSERT( s->SetPos(100)==100, "SetPos()" );
19298 MYASSERT( s->Read(buf, 1000, &bytesRead)==LVERR_OK, "Read()" );
19299 MYASSERT( bytesRead==1000, "Read() -- bytesRead" );
19300 buf[999] = 0;
19301 CRLog::trace("CHM/index.html Contents 1000: %s", buf);
19302
19303 MYASSERT( s->SetPos(0)==0, "SetPos() 2" );
19304 MYASSERT( s->Read(buf, 1000, &bytesRead)==LVERR_OK, "Read() 2" );
19305 MYASSERT( bytesRead==1000, "Read() -- bytesRead 2" );
19306 buf[999] = 0;
19307 CRLog::trace("CHM/index.html Contents 0: %s", buf);
19308 #endif
19309 #endif
19310 }
19311
19312 static void makeTestFile( const char * fname, int size )
19313 {
19314 LVStreamRef s = LVOpenFileStream( fname, LVOM_WRITE );
19315 MYASSERT( !s.isNull(), "makeTestFile create" );
19316 int seed = 0;
19317 lUInt8 * buf = new lUInt8[size];
19318 for ( int i=0; i<size; i++ ) {
19319 buf[i] = (seed >> 9) & 255;
19320 seed = seed * 31 + 14323;
19321 }
19322 MYASSERT( s->Write(buf, size, NULL)==LVERR_OK, "makeTestFile write" );
19323 delete[] buf;
19324 }
19325
19326 void runBlockWriteCacheTest()
19327 {
19328
19329
19330
19331 int sz = 2000000;
19332 const char * fn1 = "/tmp/tf1.dat";
19333 const char * fn2 = "/tmp/tf2.dat";
19334 //makeTestFile( fn1, sz );
19335 //makeTestFile( fn2, sz );
19336
19337 CRLog::debug("BlockCache test started");
19338
19339 LVStreamRef s1 = LVOpenFileStream( fn1, LVOM_APPEND );
19340 LVStreamRef s2 = LVCreateBlockWriteStream( LVOpenFileStream( fn2, LVOM_APPEND ), 0x8000, 16);
19341 MYASSERT(! s1.isNull(), "s1");
19342 MYASSERT(! s2.isNull(), "s2");
19343 LVStreamRef ss = LVCreateCompareTestStream(s1, s2);
19344 lUInt8 buf[0x100000];
19345 for ( int i=0; i<sizeof(buf); i++ ) {
19346 buf[i] = (lUInt8)(rand() & 0xFF);
19347 }
19348 //memset( buf, 0xAD, 1000000 );
19349 ss->SetPos( 0 );
19350 ss->Write( buf, 150, NULL );
19351 ss->SetPos( 0 );
19352 ss->Write( buf, 150, NULL );
19353 ss->SetPos( 0 );
19354 ss->Write( buf, 150, NULL );
19355
19356
19357 ss->SetPos( 1000 );
19358 ss->Read( buf, 5000, NULL );
19359 ss->SetPos( 100000 );
19360 ss->Read( buf+10000, 150000, NULL );
19361
19362 ss->SetPos( 1000 );
19363 ss->Write( buf, 15000, NULL );
19364 ss->SetPos( 1000 );
19365 ss->Read( buf+100000, 15000, NULL );
19366 ss->Read( buf, 1000000, NULL );
19367
19368
19369 ss->SetPos( 1000 );
19370 ss->Write( buf, 15000, NULL );
19371 ss->Write( buf, 15000, NULL );
19372 ss->Write( buf, 15000, NULL );
19373 ss->Write( buf, 15000, NULL );
19374
19375
19376 ss->SetPos( 100000 );
19377 ss->Write( buf+15000, 150000, NULL );
19378 ss->SetPos( 100000 );
19379 ss->Read( buf+25000, 200000, NULL );
19380
19381 ss->SetPos( 100000 );
19382 ss->Read( buf+55000, 200000, NULL );
19383
19384 ss->SetPos( 100000 );
19385 ss->Write( buf+1000, 250000, NULL );
19386 ss->SetPos( 150000 );
19387 ss->Read( buf, 50000, NULL );
19388 ss->SetPos( 1000000 );
19389 ss->Write( buf, 500000, NULL );
19390 for ( int i=0; i<10; i++ )
19391 ss->Write( buf, 5000, NULL );
19392 ss->Read( buf, 50000, NULL );
19393
19394 ss->SetPos( 5000000 );
19395 ss->Write( buf, 500000, NULL );
19396 ss->SetPos( 4800000 );
19397 ss->Read( buf, 500000, NULL );
19398
19399 for ( int i=0; i<20; i++ ) {
19400 int op = (rand() & 15) < 5;
19401 long offset = (rand()&0x7FFFF);
19402 long foffset = (rand()&0x3FFFFF);
19403 long size = (rand()&0x3FFFF);
19404 ss->SetPos(foffset);
19405 if ( op==0 ) {
19406 // read
19407 ss->Read(buf+offset, size, NULL);
19408 } else {
19409 ss->Write(buf+offset, size, NULL);
19410 }
19411 }
19412
19413 CRLog::debug("BlockCache test finished");
19414
19415 }
19416
19417 void runTinyDomUnitTests()
19418 {
19419 CRLog::info("runTinyDomUnitTests()");
19420 runBlockWriteCacheTest();
19421
19422 runBasicTinyDomUnitTests();
19423
19424 CRLog::info("==========================");
19425 testCacheFile();
19426
19427 runFileCacheTest();
19428 CRLog::info("==========================");
19429
19430 }
19431
19432 #endif
19433