1 /** \file lvtinydom.h
2     \brief fast and compact XML DOM tree
3 
4     CoolReader Engine
5 
6     (c) Vadim Lopatin, 2000-2009
7     This source code is distributed under the terms of
8     GNU General Public License
9     See LICENSE file for details
10 
11 
12 	Goal: make fast DOM implementation with small memory footprint.
13 
14     2009/04 : Introducing new storage model, optimized for mmap.
15     All DOM objects are divided by 2 parts.
16     1) Short RAM instance
17     2) Data storage part, which could be placed to mmap buffer.
18 
19     Document object storage should handle object table and data buffer.
20     Each object has DataIndex, index of entry in object table.
21     Object table holds pointer to RAM instance and data storage for each object.
22 */
23 
24 
25 #ifndef __LV_TINYDOM_H_INCLUDED__
26 #define __LV_TINYDOM_H_INCLUDED__
27 
28 #include "lvmemman.h"
29 #include "lvstring.h"
30 #include "lstridmap.h"
31 #include "lvxml.h"
32 #include "dtddef.h"
33 #include "lvstyles.h"
34 #include "lvdrawbuf.h"
35 #include "lvembeddedfont.h"
36 #include "lvstsheet.h"
37 #include "lvpagesplitter.h"
38 #include "lvptrvec.h"
39 #include "lvhashtable.h"
40 #include "lvimg.h"
41 #include "props.h"
42 #include "bookformats.h"
43 #include "serialbuf.h"
44 #include "lvstring32hashedcollection.h"
45 
46 // Allows for requesting older DOM building code (including bugs NOT fixed)
47 extern const int gDOMVersionCurrent;
48 
49 // Also defined in src/lvtinydom.cpp
50 #define DOM_VERSION_WITH_NORMALIZED_XPOINTERS 20200223
51 
52 #define LXML_NO_DATA       0 ///< to mark data storage record as empty
53 #define LXML_ELEMENT_NODE  1 ///< element node
54 #define LXML_TEXT_NODE     2 ///< text node
55 //#define LXML_DOCUMENT_NODE 3 ///< document node (not implemented)
56 //#define LXML_COMMENT_NODE  4 ///< comment node (not implemented)
57 
58 
59 /// docFlag mask, enable internal stylesheet of document and style attribute of elements
60 #define DOC_FLAG_ENABLE_INTERNAL_STYLES 1
61 /// docFlag mask, enable paperbook-like footnotes
62 #define DOC_FLAG_ENABLE_FOOTNOTES       2
63 /// docFlag mask, enable preformatted text
64 #define DOC_FLAG_PREFORMATTED_TEXT      4
65 /// docFlag mask, enable document embedded fonts (EPUB)
66 #define DOC_FLAG_ENABLE_DOC_FONTS       8
67 /// docFlag mask, force page breaks on non-linear fragments (EPUB)
68 #define DOC_FLAG_NONLINEAR_PAGEBREAK   16
69 /// default docFlag set
70 #define DOC_FLAG_DEFAULTS (DOC_FLAG_ENABLE_INTERNAL_STYLES|DOC_FLAG_ENABLE_FOOTNOTES|DOC_FLAG_ENABLE_DOC_FONTS)
71 
72 
73 
74 #define LXML_NS_NONE 0       ///< no namespace specified
75 #define LXML_NS_ANY  0xFFFF  ///< any namespace can be specified
76 #define LXML_ATTR_VALUE_NONE  0xFFFFFFFF  ///< attribute not found
77 
78 #define DOC_STRING_HASH_SIZE  256
79 #define RESERVED_DOC_SPACE    4096
80 #define MAX_TYPE_ID           1024 // max of element, ns, attr
81 #define MAX_ELEMENT_TYPE_ID   1024
82 #define MAX_NAMESPACE_TYPE_ID 64
83 #define MAX_ATTRIBUTE_TYPE_ID 1024
84 #define UNKNOWN_ELEMENT_TYPE_ID   (MAX_ELEMENT_TYPE_ID>>1)
85 #define UNKNOWN_ATTRIBUTE_TYPE_ID (MAX_ATTRIBUTE_TYPE_ID>>1)
86 #define UNKNOWN_NAMESPACE_TYPE_ID (MAX_NAMESPACE_TYPE_ID>>1)
87 
88 // document property names
89 #define DOC_PROP_AUTHORS         "doc.authors"
90 #define DOC_PROP_TITLE           "doc.title"
91 #define DOC_PROP_LANGUAGE        "doc.language"
92 #define DOC_PROP_DESCRIPTION     "doc.description"
93 #define DOC_PROP_KEYWORDS        "doc.keywords"
94 #define DOC_PROP_SERIES_NAME     "doc.series.name"
95 #define DOC_PROP_SERIES_NUMBER   "doc.series.number"
96 #define DOC_PROP_ARC_NAME        "doc.archive.name"
97 #define DOC_PROP_ARC_PATH        "doc.archive.path"
98 #define DOC_PROP_ARC_SIZE        "doc.archive.size"
99 #define DOC_PROP_ARC_FILE_COUNT  "doc.archive.file.count"
100 #define DOC_PROP_FILE_NAME       "doc.file.name"
101 #define DOC_PROP_FILE_PATH       "doc.file.path"
102 #define DOC_PROP_FILE_SIZE       "doc.file.size"
103 #define DOC_PROP_FILE_FORMAT     "doc.file.format"
104 #define DOC_PROP_FILE_FORMAT_ID  "doc.file.format.id"
105 #define DOC_PROP_FILE_CRC32      "doc.file.crc32"
106 #define DOC_PROP_CODE_BASE       "doc.file.code.base"
107 #define DOC_PROP_COVER_FILE      "doc.cover.file"
108 
109 #define DEF_SPACE_WIDTH_SCALE_PERCENT 100
110 #define DEF_MIN_SPACE_CONDENSING_PERCENT 50
111 #define DEF_UNUSED_SPACE_THRESHOLD_PERCENT 5
112 #define DEF_MAX_ADDED_LETTER_SPACING_PERCENT 0
113 
114 #define NODE_DISPLAY_STYLE_HASH_UNINITIALIZED 0xFFFFFFFF
115 
116 // To be used for 'direction' in ldomNode->elementFromPoint(lvPoint pt, int direction)
117 // and ldomDocument->createXPointer(lvPoint pt, int direction...) as a way to
118 // self-document what's expected (but the code does > and < comparisons, so
119 // don't change these values - some clients may also already use 0/1/-1).
120 // Use PT_DIR_EXACT to find the exact node at pt (with y AND x check),
121 // which is needed when selecting text or checking if tap is on a link,
122 // (necessary in table cells or floats, and in RTL text).
123 // Use PT_DIR_SCAN_* when interested only in finding the slice of a page
124 // at y (eg. to get the current page top), finding the nearest node in
125 // direction if pt.y happens to be in some node margin area.
126 // Use PT_DIR_SCAN_BACKWARD_LOGICAL_* when looking a whole page range
127 // xpointers, to not miss words on first or last line in bidi/RTL text.
128 #define PT_DIR_SCAN_BACKWARD_LOGICAL_LAST   -3
129 #define PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST  -2
130 #define PT_DIR_SCAN_BACKWARD                -1
131 #define PT_DIR_EXACT                         0
132 #define PT_DIR_SCAN_FORWARD                  1
133 #define PT_DIR_SCAN_FORWARD_LOGICAL_FIRST    2
134 #define PT_DIR_SCAN_FORWARD_LOGICAL_LAST     3
135 
136 
137 //#if BUILD_LITE!=1
138 /// final block cache
139 typedef LVRef<LFormattedText> LFormattedTextRef;
140 typedef LVCacheMap< ldomNode *, LFormattedTextRef> CVRendBlockCache;
141 //#endif
142 
143 
144 //#define LDOM_USE_OWN_MEM_MAN 0
145 /// XPath step kind
146 typedef enum {
147 	xpath_step_error = 0, // error
148 	xpath_step_element,   // element of type 'name' with 'index'        /elemname[N]/
149 	xpath_step_text,      // text node with 'index'                     /text()[N]/
150 	xpath_step_nodeindex, // node index                                 /N/
151 	xpath_step_point      // point index                                .N
152 } xpath_step_t;
153 xpath_step_t ParseXPathStep( const lChar8 * &path, lString8 & name, int & index );
154 
155 /// return value for continuous operations
156 typedef enum {
157     CR_DONE,    ///< operation is finished successfully
158     CR_TIMEOUT, ///< operation is incomplete - interrupted by timeout
159     CR_ERROR    ///< error while executing operation
160 } ContinuousOperationResult;
161 
162 /// type of image scaling
163 typedef enum {
164     IMG_NO_SCALE,        /// scaling is disabled
165     IMG_INTEGER_SCALING, /// integer multipier/divisor scaling -- *2, *3 only
166     IMG_FREE_SCALING     /// free scaling, non-integer factor
167 } img_scaling_mode_t;
168 
169 enum XPointerMode {
170     XPATH_USE_NAMES = 0,
171     XPATH_USE_INDEXES
172 };
173 
174 /// image scaling option
175 struct img_scaling_option_t {
176     img_scaling_mode_t mode;
177     int max_scale;
getHashimg_scaling_option_t178     int getHash() { return (int)mode * 33 + max_scale; }
179     // creates default option value
180     img_scaling_option_t();
181 };
182 
183 /// set of images scaling options for different kind of images
184 struct img_scaling_options_t {
185     img_scaling_option_t zoom_in_inline;
186     img_scaling_option_t zoom_in_block;
187     img_scaling_option_t zoom_out_inline;
188     img_scaling_option_t zoom_out_block;
189     /// returns hash value
getHashimg_scaling_options_t190     int getHash() { return (((zoom_in_inline.getHash()*33 + zoom_in_block.getHash())*33 + zoom_out_inline.getHash())*33 + zoom_out_block.getHash()); }
191     /// creates default options
192     img_scaling_options_t();
193     /// returns true if any changes occured
194     bool update( CRPropRef props, int fontSize );
195 };
196 
197 //#if BUILD_LITE!=1
198 struct DataStorageItemHeader;
199 struct TextDataStorageItem;
200 struct ElementDataStorageItem;
201 struct NodeItem;
202 class DataBuffer;
203 //#endif
204 
205 
206 /// DocView Callback interface - track progress, external links, etc.
207 class LVDocViewCallback {
208 public:
209     /// on starting file loading
OnLoadFileStart(lString32 filename)210     virtual void OnLoadFileStart( lString32 filename ) { CR_UNUSED(filename); }
211     /// format detection finished
OnLoadFileFormatDetected(doc_format_t)212     virtual void OnLoadFileFormatDetected( doc_format_t /*fileFormat*/) { }
213     /// file loading is finished successfully - drawCoveTo() may be called there
OnLoadFileEnd()214     virtual void OnLoadFileEnd() { }
215     /// first page is loaded from file an can be formatted for preview
OnLoadFileFirstPagesReady()216     virtual void OnLoadFileFirstPagesReady() { }
217     /// file progress indicator, called with values 0..100
OnLoadFileProgress(int)218     virtual void OnLoadFileProgress( int /*percent*/) { }
219     /// file load finiished with error
OnLoadFileError(lString32)220     virtual void OnLoadFileError(lString32 /*message*/) { }
221     /// node style update started
OnNodeStylesUpdateStart()222     virtual void OnNodeStylesUpdateStart() { }
223     /// node style update finished
OnNodeStylesUpdateEnd()224     virtual void OnNodeStylesUpdateEnd() { }
225     /// node style update progress, called with values 0..100
OnNodeStylesUpdateProgress(int)226     virtual void OnNodeStylesUpdateProgress(int /*percent*/) { }
227     /// document formatting started
OnFormatStart()228     virtual void OnFormatStart() { }
229     /// document formatting finished
OnFormatEnd()230     virtual void OnFormatEnd() { }
231     /// format progress, called with values 0..100
OnFormatProgress(int)232     virtual void OnFormatProgress(int /*percent*/) { }
233     /// document fully loaded and rendered (follows OnFormatEnd(), or OnLoadFileEnd() when loaded from cache)
OnDocumentReady()234     virtual void OnDocumentReady() { }
235     /// format progress, called with values 0..100
OnExportProgress(int)236     virtual void OnExportProgress(int /*percent*/) { }
237     /// Override to handle external links
OnExternalLink(lString32,ldomNode *)238     virtual void OnExternalLink(lString32 /*url*/, ldomNode * /*node*/) { }
239     /// Called when page images should be invalidated (clearImageCache() called in LVDocView)
OnImageCacheClear()240     virtual void OnImageCacheClear() { }
241     /// return true if reload will be processed by external code, false to let internal code process it
OnRequestReload()242     virtual bool OnRequestReload() { return false; }
243     /// save cache file started
OnSaveCacheFileStart()244     virtual void OnSaveCacheFileStart() { }
245     /// save cache file finished
OnSaveCacheFileEnd()246     virtual void OnSaveCacheFileEnd() { }
247     /// save cache file progress, called with values 0..100
OnSaveCacheFileProgress(int)248     virtual void OnSaveCacheFileProgress(int /*percent*/) { }
249     /// destructor
~LVDocViewCallback()250     virtual ~LVDocViewCallback() { }
251 };
252 
253 class CacheLoadingCallback
254 {
255 public:
256     /// called when format of document being loaded from cache became known
257     virtual void OnCacheFileFormatDetected( doc_format_t ) = 0;
~CacheLoadingCallback()258     virtual ~CacheLoadingCallback() { }
259 };
260 
261 
262 class ldomTextStorageChunk;
263 class ldomTextStorageChunkBuilder;
264 struct ElementDataStorageItem;
265 class CacheFile;
266 class tinyNodeCollection;
267 
268 struct ldomNodeStyleInfo
269 {
270     lUInt16 _fontIndex;
271     lUInt16 _styleIndex;
272 };
273 
274 class ldomBlobItem;
275 #define BLOB_NAME_PREFIX U"@blob#"
276 #define MOBI_IMAGE_NAME_PREFIX U"mobi_image_"
277 class ldomBlobCache
278 {
279     CacheFile * _cacheFile;
280     LVPtrVector<ldomBlobItem> _list;
281     bool _changed;
282     bool loadIndex();
283     bool saveIndex();
284 public:
285     ldomBlobCache();
286     void setCacheFile( CacheFile * cacheFile );
287     ContinuousOperationResult saveToCache(CRTimerUtil & timeout);
288     bool addBlob( const lUInt8 * data, int size, lString32 name );
289     LVStreamRef getBlob( lString32 name );
290 };
291 
292 class ldomDataStorageManager
293 {
294     friend class ldomTextStorageChunk;
295 protected:
296     tinyNodeCollection * _owner;
297     LVPtrVector<ldomTextStorageChunk> _chunks;
298     ldomTextStorageChunk * _activeChunk;
299     ldomTextStorageChunk * _recentChunk;
300     CacheFile * _cache;
301     lUInt32 _uncompressedSize;
302     lUInt32 _maxUncompressedSize;
303     lUInt32 _chunkSize;
304     char _type;       /// type, to show in log
305     bool _maxSizeReachedWarned;
306     ldomTextStorageChunk * getChunk( lUInt32 address );
307 public:
308     /// type
309     lUInt16 cacheType();
310     /// saves all unsaved chunks to cache file
311     bool save( CRTimerUtil & maxTime );
312     /// load chunk index from cache file
313     bool load();
314     /// sets cache file
315     void setCache( CacheFile * cache );
316     /// checks buffer sizes, compacts most unused chunks
317     void compact( int reservedSpace , const ldomTextStorageChunk *excludedChunk = NULL );
getUncompressedSize()318     lUInt32 getUncompressedSize() { return _uncompressedSize; }
319 #if BUILD_LITE!=1
320     /// allocates new text node, return its address inside storage
321     lUInt32 allocText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text );
322     /// allocates storage for new element, returns address address inside storage
323     lUInt32 allocElem( lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount );
324     /// get text by address
325     lString8 getText( lUInt32 address );
326     /// get pointer to text data
327     TextDataStorageItem * getTextItem( lUInt32 addr );
328     /// get pointer to element data
329     ElementDataStorageItem * getElem( lUInt32 addr );
330     /// change node's parent, returns true if modified
331     bool setParent( lUInt32 address, lUInt32 parent );
332     /// returns node's parent by address
333     lUInt32 getParent( lUInt32 address );
334     /// free data item
335     void freeNode( lUInt32 addr );
336     /// call to invalidate chunk if content is modified
337     void modified( lUInt32 addr );
338     /// return true if some chunks have been allocated
hasChunks()339     bool hasChunks() { return _chunks.length() > 0; }
340 #endif
341 
342     /// get or allocate space for rect data item
343     void getRendRectData( lUInt32 elemDataIndex, lvdomElementFormatRec * dst );
344     /// set rect data item
345     void setRendRectData( lUInt32 elemDataIndex, const lvdomElementFormatRec * src );
346 
347     /// get or allocate space for element style data item
348     void getStyleData( lUInt32 elemDataIndex, ldomNodeStyleInfo * dst );
349     /// set element style data item
350     void setStyleData( lUInt32 elemDataIndex, const ldomNodeStyleInfo * src );
351 
352     ldomDataStorageManager( tinyNodeCollection * owner, char type, lUInt32 maxUnpackedSize, lUInt32 chunkSize );
353     ~ldomDataStorageManager();
354 };
355 
356 /// class to store compressed/uncompressed text nodes chunk
357 class ldomTextStorageChunk
358 {
359     friend class ldomDataStorageManager;
360     ldomDataStorageManager * _manager;
361     ldomTextStorageChunk * _nextRecent;
362     ldomTextStorageChunk * _prevRecent;
363     lUInt8 * _buf;     /// buffer for uncompressed data
364     lUInt32 _bufsize;  /// _buf (uncompressed) area size, bytes
365     lUInt32 _bufpos;  /// _buf (uncompressed) data write position (for appending of new data)
366     lUInt16 _index;  /// ? index of chunk in storage
367     char _type;       /// type, to show in log
368     bool _saved;
369 
370     void setunpacked( const lUInt8 * buf, int bufsize );
371     /// pack data, and remove unpacked
372     void compact();
373 #if BUILD_LITE!=1
374     /// pack data, and remove unpacked, put packed data to cache file
375     bool swapToCache( bool removeFromMemory );
376     /// read packed data from cache
377     bool restoreFromCache();
378 #endif
379     /// unpacks chunk, if packed; checks storage space, compact if necessary
380     void ensureUnpacked();
381 #if BUILD_LITE!=1
382     /// free data item
383     void freeNode( int offset );
384     /// saves data to cache file, if unsaved
385     bool save();
386 #endif
387 public:
388     /// call to invalidate chunk if content is modified
389     void modified();
390     /// returns chunk index inside collection
getIndex()391     int getIndex() { return _index; }
392     /// returns free space in buffer
393     int space();
394     /// adds new text item to buffer, returns offset inside chunk of stored data
395     int addText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text );
396     /// adds new element item to buffer, returns offset inside chunk of stored data
397     int addElem( lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount );
398     /// get text item from buffer by offset
399     lString8 getText( int offset );
400     /// get node parent by offset
401     lUInt32 getParent( int offset );
402     /// set node parent by offset
403     bool setParent( int offset, lUInt32 parentIndex );
404     /// get pointer to element data
405     ElementDataStorageItem * getElem( int offset );
406     /// get raw data bytes
407     void getRaw( int offset, int size, lUInt8 * buf );
408     /// set raw data bytes
409     void setRaw( int offset, int size, const lUInt8 * buf );
410     /// create empty buffer
411     ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index);
412     /// create chunk to be read from cache file
413     ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index, lUInt32 compsize, lUInt32 uncompsize);
414     /// create with preallocated buffer, for raw access
415     ldomTextStorageChunk(lUInt32 preAllocSize, ldomDataStorageManager * manager, lUInt16 index);
416     ~ldomTextStorageChunk();
417 };
418 
419 // forward declaration
420 struct ldomNode;
421 
422 // About these #define TNC_PART_* :
423 // A ldomNode unique reference is defined by:
424 //    struct ldomNodeHandle {     /// compact 32bit value for node
425 //        unsigned _docIndex:8;   // index in ldomNode::_documentInstances[MAX_DOCUMENT_INSTANCE_COUNT];
426 //        unsigned _dataIndex:24; // index of node in document's storage and type
427 //    };
428 // The 24 bits of _dataIndex are used that way:
429 //        return &(_elemList[index>>TNC_PART_INDEX_SHIFT][(index>>4)&TNC_PART_MASK]);
430 //        #define TNTYPE  (_handle._dataIndex&0x0F)
431 //        #define TNINDEX (_handle._dataIndex&(~0x0E))
432 //   24>15 10bits (1024 values) : index in the first-level _elemList[TNC_PART_COUNT]
433 //   14> 5 10bits (1024 values) : sub-index in second-level _elemList[first_index][]
434 //    4> 1  4bits (16 values) : type (bit 1: text | element, bit 2: mutable | permanent)
435 //                                   (bit 3 and 4 are not used, so we could grab 2 more bits from here if needed)
436 //
437 // We can update ldomNodeHandle to:
438 //    struct ldomNodeHandle {
439 //        unsigned _docIndex:4;   // decreasing MAX_DOCUMENT_INSTANCE_COUNT from 256 to 16
440 //        unsigned _dataIndex:28; // get 4 more bits that we can distribute to these indexes.
441 //    };
442 // The other #define below (and possibly the code too) assume the same TNC_PART_SHIFT for both indexes,
443 // so let's distribute 2 bits to each:
444 //   28>17 12bits (4096 values) : index in the first-level _elemList[TNC_PART_COUNT]
445 //   16> 5 12bits (4096 values) : sub-index in second-level _elemList[first_index][]
446 //    4> 1  4bits (16 values)
447 // With that, we have increased the max number of text nodes and the max number of
448 // element nodes from 1024x1024 (1M) to 4096x4096 (16M) which allows loading very large books.
449 
450 //#define TNC_PART_COUNT 1024
451 //#define TNC_PART_SHIFT 10
452 #define TNC_PART_COUNT 4096
453 #define TNC_PART_SHIFT 12
454 #define TNC_PART_INDEX_SHIFT (TNC_PART_SHIFT+4)
455 #define TNC_PART_LEN (1<<TNC_PART_SHIFT)
456 #define TNC_PART_MASK (TNC_PART_LEN-1)
457 /// storage of ldomNode
458 class tinyNodeCollection
459 {
460     friend struct ldomNode;
461     friend class tinyElement;
462     friend class ldomDocument;
463 private:
464     int _textCount;
465     lUInt32 _textNextFree;
466     ldomNode * _textList[TNC_PART_COUNT];
467     int _elemCount;
468     lUInt32 _elemNextFree;
469     ldomNode * _elemList[TNC_PART_COUNT];
470     LVIndexedRefCache<css_style_ref_t> _styles;
471     LVIndexedRefCache<font_ref_t> _fonts;
472     int _tinyElementCount;
473     int _itemCount;
474     int _docIndex;
475 
476 protected:
477 #if BUILD_LITE!=1
478     /// final block cache
479     CVRendBlockCache _renderedBlockCache;
480     CacheFile * _cacheFile;
481     bool _cacheFileStale;
482     bool _cacheFileLeaveAsDirty;
483     bool _mapped;
484     bool _maperror;
485     int  _mapSavingStage;
486 
487     img_scaling_options_t _imgScalingOptions;
488     int  _spaceWidthScalePercent;
489     int  _minSpaceCondensingPercent;
490     int  _unusedSpaceThresholdPercent;
491     int  _maxAddedLetterSpacingPercent;
492 
493     lUInt32 _nodeStyleHash;
494     lUInt32 _nodeDisplayStyleHash;
495     lUInt32 _nodeDisplayStyleHashInitial;
496     bool _nodeStylesInvalidIfLoading;
497 
498     int calcFinalBlocks();
499     void dropStyles();
500 #endif
501     bool _hangingPunctuationEnabled;
502     lUInt32 _renderBlockRenderingFlags;
503     lUInt32 _DOMVersionRequested;
504     int _interlineScaleFactor;
505 
506     ldomDataStorageManager _textStorage; // persistent text node data storage
507     ldomDataStorageManager _elemStorage; // persistent element data storage
508     ldomDataStorageManager _rectStorage; // element render rect storage
509     ldomDataStorageManager _styleStorage;// element style storage (font & style indexes ldomNodeStyleInfo)
510 
511     CRPropRef _docProps;
512     lUInt32 _docFlags; // document flags
513 
514     int _styleIndex;
515 
516     LVStyleSheet  _stylesheet;
517 
518     LVHashTable<lUInt16, lUInt16> _fontMap; // style index to font index
519 
520     /// checks buffer sizes, compacts most unused chunks
521     ldomBlobCache _blobCache;
522 
523     /// uniquie id of file format parsing option (usually 0, but 1 for preformatted text files)
524     int getPersistenceFlags();
525 
526 #if BUILD_LITE!=1
527     bool saveStylesData();
528     bool loadStylesData();
529     bool updateLoadedStyles( bool enabled );
530     lUInt32 calcStyleHash(bool already_rendered);
531     bool saveNodeData();
532     bool saveNodeData( lUInt16 type, ldomNode ** list, int nodecount );
533     bool loadNodeData();
534     bool loadNodeData( lUInt16 type, ldomNode ** list, int nodecount );
535 
hasRenderData()536     bool hasRenderData() { return _rectStorage.hasChunks(); }
537 
538     bool openCacheFile();
539 
540     void setNodeStyleIndex( lUInt32 dataIndex, lUInt16 index );
541     void setNodeFontIndex( lUInt32 dataIndex, lUInt16 index );
542     lUInt16 getNodeStyleIndex( lUInt32 dataIndex );
543     lUInt16 getNodeFontIndex( lUInt32 dataIndex );
544     css_style_ref_t getNodeStyle( lUInt32 dataIndex );
545     font_ref_t getNodeFont( lUInt32 dataIndex );
546     void setNodeStyle( lUInt32 dataIndex, css_style_ref_t & v );
547     void setNodeFont( lUInt32 dataIndex, font_ref_t & v  );
548     void clearNodeStyle( lUInt32 dataIndex );
resetNodeNumberingProps()549     virtual void resetNodeNumberingProps() { }
550 #endif
551 
552     /// creates empty collection
553     tinyNodeCollection();
554     tinyNodeCollection( tinyNodeCollection & v );
555 
556 public:
557 
558 #if BUILD_LITE!=1
getSpaceWidthScalePercent()559     int getSpaceWidthScalePercent() {
560         return _spaceWidthScalePercent;
561     }
562 
setSpaceWidthScalePercent(int spaceWidthScalePercent)563     bool setSpaceWidthScalePercent(int spaceWidthScalePercent) {
564         if (spaceWidthScalePercent == _spaceWidthScalePercent)
565             return false;
566         _spaceWidthScalePercent = spaceWidthScalePercent;
567         return true;
568     }
569 
setMinSpaceCondensingPercent(int minSpaceCondensingPercent)570     bool setMinSpaceCondensingPercent(int minSpaceCondensingPercent) {
571         if (minSpaceCondensingPercent == _minSpaceCondensingPercent)
572             return false;
573         _minSpaceCondensingPercent = minSpaceCondensingPercent;
574         return true;
575     }
576 
setUnusedSpaceThresholdPercent(int unusedSpaceThresholdPercent)577     bool setUnusedSpaceThresholdPercent(int unusedSpaceThresholdPercent) {
578         if (unusedSpaceThresholdPercent == _unusedSpaceThresholdPercent)
579             return false;
580         _unusedSpaceThresholdPercent = unusedSpaceThresholdPercent;
581         return true;
582     }
583 
setMaxAddedLetterSpacingPercent(int maxAddedLetterSpacingPercent)584     bool setMaxAddedLetterSpacingPercent(int maxAddedLetterSpacingPercent) {
585         if (maxAddedLetterSpacingPercent == _maxAddedLetterSpacingPercent)
586             return false;
587         _maxAddedLetterSpacingPercent = maxAddedLetterSpacingPercent;
588         // This does not need to trigger a re-rendering, just
589         // a re-formatting of the final blocks
590         _renderedBlockCache.clear();
591         return true;
592     }
593 
594     /// add named BLOB data to document
addBlob(lString32 name,const lUInt8 * data,int size)595     bool addBlob(lString32 name, const lUInt8 * data, int size) { _cacheFileStale = true ; return _blobCache.addBlob(data, size, name); }
596     /// get BLOB by name
getBlob(lString32 name)597     LVStreamRef getBlob(lString32 name) { return _blobCache.getBlob(name); }
598 
599     /// called on document loading end
600     bool validateDocument();
601 
602     /// swaps to cache file or saves changes, limited by time interval (can be called again to continue after TIMEOUT)
603     virtual ContinuousOperationResult swapToCache(CRTimerUtil & maxTime) = 0;
604     /// try opening from cache file, find by source file name (w/o path) and crc32
605     virtual bool openFromCache( CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback=NULL ) = 0;
606     /// saves recent changes to mapped file, with timeout (can be called again to continue after TIMEOUT)
607     virtual ContinuousOperationResult updateMap(CRTimerUtil & maxTime, LVDocViewCallback * progressCallback=NULL) = 0;
608     /// saves recent changes to mapped file
609     virtual bool updateMap(LVDocViewCallback * progressCallback=NULL) {
610         CRTimerUtil infinite;
611         return updateMap(infinite, progressCallback)!=CR_ERROR;
612     }
613 
614     bool swapToCacheIfNecessary();
615 
616 
617     bool createCacheFile();
618 #endif
619 
getHangingPunctiationEnabled()620     bool getHangingPunctiationEnabled() const {
621         return _hangingPunctuationEnabled;
622     }
623     bool setHangingPunctiationEnabled(bool value);
624 
getRenderBlockRenderingFlags()625     lUInt32 getRenderBlockRenderingFlags() const {
626         return _renderBlockRenderingFlags;
627     }
628     bool setRenderBlockRenderingFlags(lUInt32 flags);
629 
getDOMVersionRequested()630     lUInt32 getDOMVersionRequested() const {
631         return _DOMVersionRequested;
632     }
633     bool setDOMVersionRequested(lUInt32 version);
634 
getInterlineScaleFactor()635     int getInterlineScaleFactor() const {
636         return _interlineScaleFactor;
637     }
638     bool setInterlineScaleFactor(int value);
639 
getDocFlag(lUInt32 mask)640     inline bool getDocFlag( lUInt32 mask )
641     {
642         return (_docFlags & mask) != 0;
643     }
644 
645     void setDocFlag( lUInt32 mask, bool value );
646 
getDocFlags()647     inline lUInt32 getDocFlags()
648     {
649         return _docFlags;
650     }
651 
getDocIndex()652     inline int getDocIndex()
653     {
654         return _docIndex;
655     }
656 
getFontContextDocIndex()657     inline int getFontContextDocIndex()
658     {
659         return (_docFlags & DOC_FLAG_ENABLE_DOC_FONTS) && (_docFlags & DOC_FLAG_ENABLE_INTERNAL_STYLES) ? _docIndex : -1;
660     }
661 
662     void setDocFlags( lUInt32 value );
663 
664 
665     /// returns doc properties collection
getProps()666     inline CRPropRef getProps() { return _docProps; }
667     /// returns doc properties collection
setProps(CRPropRef props)668     void setProps( CRPropRef props ) { _docProps = props; }
669 
670 #if BUILD_LITE!=1
671     /// set cache file stale flag
setCacheFileStale(bool stale)672     void setCacheFileStale( bool stale ) { _cacheFileStale = stale; }
673 
674     /// is built (and cached) DOM possibly invalid (can happen when some nodes have changed display style)
isBuiltDomStale()675     bool isBuiltDomStale() {
676         return _nodeDisplayStyleHashInitial != NODE_DISPLAY_STYLE_HASH_UNINITIALIZED &&
677                 _nodeDisplayStyleHash != _nodeDisplayStyleHashInitial;
678     }
setNodeStylesInvalidIfLoading()679     void setNodeStylesInvalidIfLoading() {
680         _nodeStylesInvalidIfLoading = true;
681     }
682 
683     /// if a cache file is in use
hasCacheFile()684     bool hasCacheFile() { return _cacheFile != NULL; }
685     /// set cache file as dirty, so it's not re-used on next load
invalidateCacheFile()686     void invalidateCacheFile() { _cacheFileLeaveAsDirty = true; }
687     /// get cache file full path
688     lString32 getCacheFilePath();
689 #endif
690 
691     /// minimize memory consumption
692     void compact();
693     /// dumps memory usage statistics to debug log
694     void dumpStatistics();
695     /// get memory usage statistics
696     lString32 getStatistics();
697 
698     /// get ldomNode instance pointer
699     ldomNode * getTinyNode( lUInt32 index );
700     /// allocate new ldomNode
701     ldomNode * allocTinyNode( int type );
702     /// allocate new tinyElement
703     ldomNode * allocTinyElement( ldomNode * parent, lUInt16 nsid, lUInt16 id );
704     /// recycle ldomNode on node removing
705     void recycleTinyNode( lUInt32 index );
706 
707 
708 
709 #if BUILD_LITE!=1
710     /// put all object into persistent storage
711     virtual void persist( CRTimerUtil & maxTime );
712 #endif
713 
714 
715     /// destroys collection
716     virtual ~tinyNodeCollection();
717 };
718 
719 class ldomDocument;
720 class tinyElement;
721 struct lxmlAttribute;
722 
723 #if BUILD_LITE!=1
724 class RenderRectAccessor : public lvdomElementFormatRec
725 {
726     ldomNode * _node;
727     bool _modified;
728     bool _dirty;
729 public:
730     //RenderRectAccessor & operator -> () { return *this; }
731     int getX();
732     int getY();
733     int getWidth();
734     int getHeight();
735     void getRect( lvRect & rc );
736     void setX( int x );
737     void setY( int y );
738     void setWidth( int w );
739     void setHeight( int h );
740 
741     int getInnerWidth();
742     int getInnerX();
743     int getInnerY();
744     void setInnerX( int x );
745     void setInnerY( int y );
746     void setInnerWidth( int w );
747 
748     int  getUsableLeftOverflow();
749     int  getUsableRightOverflow();
750     void setUsableLeftOverflow( int dx );
751     void setUsableRightOverflow( int dx );
752 
753     int  getTopOverflow();
754     int  getBottomOverflow();
755     void setTopOverflow( int dy );
756     void setBottomOverflow( int dy );
757 
758     int  getBaseline();
759     void setBaseline( int baseline );
760     int  getListPropNodeIndex();
761     void setListPropNodeIndex( int idx );
762     int  getLangNodeIndex();
763     void setLangNodeIndex( int idx );
764 
765     unsigned short getFlags();
766     void setFlags( unsigned short flags );
767 
768     void getTopRectsExcluded( int & lw, int & lh, int & rw, int & rh );
769     void setTopRectsExcluded( int lw, int lh, int rw, int rh );
770     void getNextFloatMinYs( int & left, int & right );
771     void setNextFloatMinYs( int left, int right );
772     void getInvolvedFloatIds( int & float_count, lUInt32 * float_ids );
773     void setInvolvedFloatIds( int float_count, lUInt32 * float_ids );
774 
775     void push();
776     void clear();
777     RenderRectAccessor( ldomNode * node );
778     ~RenderRectAccessor();
779 };
780 #endif
781 
782 /// compact 32bit value for node
783 struct ldomNodeHandle {
784     // See comment above around #define TNC_PART_COUNT and TNC_PART_SHIFT changes
785     // Original crengine field sizes:
786     // unsigned _docIndex:8;
787     // unsigned _dataIndex:24;
788     unsigned _docIndex:4;   // index in ldomNode::_documentInstances[MAX_DOCUMENT_INSTANCE_COUNT];
789     unsigned _dataIndex:28; // index of node in document's storage and type
790 };
791 
792 /// max number which could be stored in ldomNodeHandle._docIndex
793 // #define MAX_DOCUMENT_INSTANCE_COUNT 256
794 #define MAX_DOCUMENT_INSTANCE_COUNT 16
795 
796 
797 class ldomTextNode;
798 // no vtable, very small size (16 bytes)
799 // optimized for 32 bit systems
800 struct ldomNode
801 {
802     friend class tinyNodeCollection;
803     friend class RenderRectAccessor;
804     friend class NodeImageProxy;
805     friend class ldomDocument;
806 
807 private:
808 
809     static ldomDocument * _documentInstances[MAX_DOCUMENT_INSTANCE_COUNT];
810 
811     /// adds document to list, returns ID of allocated document, -1 if no space in instance array
812     static int registerDocument( ldomDocument * doc );
813     /// removes document from list
814     static void unregisterDocument( ldomDocument * doc );
815 
816     // types for _handle._type
817     enum {
818         NT_TEXT=0,       // mutable text node
819         NT_ELEMENT=1    // mutable element node
820 #if BUILD_LITE!=1
821         ,
822         NT_PTEXT=2,      // immutable (persistent) text node
823         NT_PELEMENT=3   // immutable (persistent) element node
824 #endif
825     };
826 
827     /// 0: packed 32bit data field
828     ldomNodeHandle _handle; // _docIndex, _dataIndex, _type
829 
830     /// 4: misc data 4 bytes (8 bytes on x64)
831     union {                    // [8] 8 bytes (16 bytes on x64)
832         ldomTextNode * _text_ptr;   // NT_TEXT: mutable text node pointer
833         tinyElement * _elem_ptr;    // NT_ELEMENT: mutable element pointer
834 #if BUILD_LITE!=1
835         lUInt32 _pelem_addr;        // NT_PELEMENT: element storage address: chunk+offset
836         lUInt32 _ptext_addr;        // NT_PTEXT: persistent text storage address: chunk+offset
837 #endif
838         lUInt32 _nextFreeIndex;     // NULL for removed items
839     } _data;
840 
841 
842     /// sets document for node
setDocumentIndexldomNode843     inline void setDocumentIndex( int index ) { _handle._docIndex = index; }
844     void setStyleIndexInternal( lUInt16 index );
845     void setFontIndexInternal( lUInt16 index );
846 
847 
848 #define TNTYPE  (_handle._dataIndex&0x0F)
849 #define TNINDEX (_handle._dataIndex&(~0x0E))
850 #define TNCHUNK (_addr>>&(~0x0F))
851     void onCollectionDestroy();
getTinyNodeldomNode852     inline ldomNode * getTinyNode( lUInt32 index ) const { return ((tinyNodeCollection*)getDocument())->getTinyNode(index); }
853 
deleteldomNode854     void operator delete(void *)
855     {
856         // Do nothing. Just to disable delete.
857     }
858 
859     /// changes parent of item
860     void setParentNode( ldomNode * newParent );
861     /// add child
862     void addChild( lInt32 childNodeIndex );
863 
864     /// call to invalidate cache if persistent node content is modified
865     void modified();
866 
867     /// returns copy of render data structure
868     void getRenderData( lvdomElementFormatRec & dst);
869     /// sets new value for render data structure
870     void setRenderData( lvdomElementFormatRec & newData);
871 
872     void autoboxChildren( int startIndex, int endIndex, bool handleFloating=false );
873     void removeChildren( int startIndex, int endIndex );
874     bool cleanIfOnlyEmptyTextInline( bool handleFloating=false );
875     /// returns true if element has inline content (non empty text, images, <BR>)
876     bool hasNonEmptyInlineContent( bool ignoreFloats=false );
877 
878 public:
879 #if BUILD_LITE!=1
880     // Generic version of autoboxChildren() without any specific inline/block checking,
881     // accepting any element id (from the enum el_*, like el_div, el_tabularBox) as
882     // the wrapping element.
883     ldomNode * boxWrapChildren( int startIndex, int endIndex, lUInt16 elementId );
884 
885     // Ensure this node has a ::before/::after pseudo element as
886     // child, creating it if needed and possible
887     void ensurePseudoElement( bool is_before );
888 
889     /// if stylesheet file name is set, and file is found, set stylesheet to its value
890     bool applyNodeStylesheet();
891 
892     bool initNodeFont();
893     void initNodeStyle();
894     /// init render method for this node only (children should already have rend method set)
895     void initNodeRendMethod();
896     /// init render method for the whole subtree
897     void initNodeRendMethodRecursive();
898     /// init render method for the whole subtree
899     void initNodeStyleRecursive( LVDocViewCallback * progressCallback );
900 #endif
901 
902 
903     /// remove node, clear resources
904     void destroy();
905 
906     /// returns true for invalid/deleted node ot NULL this pointer
isNullldomNode907     inline bool isNull() const { return _handle._dataIndex==0 || getDocument() == NULL; }
908     /// returns true if node is stored in persistent storage
isPersistentldomNode909     inline bool isPersistent() const { return (_handle._dataIndex&2)!=0; }
910     /// returns data index of node's registration in document data storage
getDataIndexldomNode911     inline lInt32 getDataIndex() const { return TNINDEX; }
912     /// returns pointer to document
getDocumentldomNode913     inline ldomDocument * getDocument() const { return _documentInstances[_handle._docIndex]; }
914     /// returns pointer to parent node, NULL if node has no parent
915     ldomNode * getParentNode() const;
916     /// returns node type, either LXML_TEXT_NODE or LXML_ELEMENT_NODE
getNodeTypeldomNode917     inline lUInt8 getNodeType() const
918     {
919         return (_handle._dataIndex & 1) ? LXML_ELEMENT_NODE : LXML_TEXT_NODE;
920     }
921     /// returns node level, 0 is root node
922     lUInt8 getNodeLevel() const;
923     /// returns dataIndex of node's parent, 0 if no parent
924     int getParentIndex() const;
925     /// returns index of node inside parent's child collection
926     int getNodeIndex() const;
927     /// returns index of child node by dataIndex
928     int getChildIndex( lUInt32 dataIndex ) const;
929     /// returns true if node is document's root
930     bool isRoot() const;
931     /// returns true if node is text
isTextldomNode932     inline bool isText() const { return _handle._dataIndex && !(_handle._dataIndex&1); }
933     /// returns true if node is element
isElementldomNode934     inline bool isElement() const { return _handle._dataIndex && (_handle._dataIndex&1); }
935     /// returns true if node is and element that has children
hasChildrenldomNode936     inline bool hasChildren() { return getChildCount()!=0; }
937     /// returns true if node is element has attributes
hasAttributesldomNode938     inline bool hasAttributes() const { return getAttrCount()!=0; }
939 
940     /// returns element child count
941     int getChildCount() const;
942     /// returns element attribute count
943     int getAttrCount() const;
944     /// returns attribute value by attribute name id and namespace id
945     const lString32 & getAttributeValue( lUInt16 nsid, lUInt16 id ) const;
946     /// returns attribute value by attribute name
getAttributeValueldomNode947     inline const lString32 & getAttributeValue( const lChar32 * attrName ) const
948     {
949         return getAttributeValue( NULL, attrName );
950     }
951     /// returns attribute value by attribute name
getAttributeValueldomNode952     inline const lString32 & getAttributeValue( const lChar8 * attrName ) const
953     {
954         return getAttributeValue( NULL, attrName );
955     }
956     /// returns attribute value by attribute name and namespace
957     const lString32 & getAttributeValue( const lChar32 * nsName, const lChar32 * attrName ) const;
958     /// returns attribute value by attribute name and namespace
959     const lString32 & getAttributeValue( const lChar8 * nsName, const lChar8 * attrName ) const;
960     /// returns attribute by index
961     const lxmlAttribute * getAttribute( lUInt32 ) const;
962     /// returns true if element node has attribute with specified name id and namespace id
963     bool hasAttribute( lUInt16 nsId, lUInt16 attrId ) const;
964     /// returns attribute name by index
965     const lString32 & getAttributeName( lUInt32 ) const;
966     /// sets attribute value
967     void setAttributeValue( lUInt16 , lUInt16 , const lChar32 *  );
968     /// returns attribute value by attribute name id
getAttributeValueldomNode969     inline const lString32 & getAttributeValue( lUInt16 id ) const { return getAttributeValue( LXML_NS_ANY, id ); }
970     /// returns true if element node has attribute with specified name id
hasAttributeldomNode971     inline bool hasAttribute( lUInt16 id ) const  { return hasAttribute( LXML_NS_ANY, id ); }
972 
973     /// returns attribute value by attribute name id, looking at children if needed
974     const lString32 & getFirstInnerAttributeValue( lUInt16 nsid, lUInt16 id ) const;
getFirstInnerAttributeValueldomNode975     const lString32 & getFirstInnerAttributeValue( lUInt16 id ) const { return getFirstInnerAttributeValue( LXML_NS_ANY, id ); }
976 
977     /// returns element type structure pointer if it was set in document for this element name
978     const css_elem_def_props_t * getElementTypePtr();
979     /// returns element name id
980     lUInt16 getNodeId() const;
981     /// returns element namespace id
982     lUInt16 getNodeNsId() const;
983     /// replace element name id with another value
984     void setNodeId( lUInt16 );
985     /// returns element name
986     const lString32 & getNodeName() const;
987     /// compares node name with value, returns true if matches
988     bool isNodeName(const char * name) const;
989     /// returns element namespace name
990     const lString32 & getNodeNsName() const;
991 
992     /// returns child node by index
993     ldomNode * getChildNode( lUInt32 index ) const;
994     /// returns true child node is element
995     bool isChildNodeElement( lUInt32 index ) const;
996     /// returns true child node is text
997     bool isChildNodeText( lUInt32 index ) const;
998     /// returns child node by index, NULL if node with this index is not element or nodeId!=0 and element node id!=nodeId
999     ldomNode * getChildElementNode( lUInt32 index, lUInt16 nodeId=0 ) const;
1000     /// returns child node by index, NULL if node with this index is not element or nodeTag!=0 and element node name!=nodeTag
1001     ldomNode * getChildElementNode( lUInt32 index, const lChar32 * nodeTag ) const;
1002 
1003     /// returns text node text as wide string
1004     lString32 getText( lChar32 blockDelimiter = 0, int maxSize=0 ) const;
1005     /// returns text node text as utf8 string
1006     lString8 getText8( lChar8 blockDelimiter = 0, int maxSize=0 ) const;
1007     /// sets text node text as wide string
1008     void setText( lString32 );
1009     /// sets text node text as utf8 string
1010     void setText8( lString8 );
1011 
1012 
1013     /// returns node absolute rectangle (with inner=true, for erm_final, additionally
1014     //  shifted by the inner paddings (exluding padding bottom) to get the absolute rect
1015     //  of the inner LFormattedText.
1016     void getAbsRect( lvRect & rect, bool inner=false );
1017     /// sets node rendering structure pointer
1018     void clearRenderData();
1019     /// reset node rendering structure pointer for sub-tree
1020     void clearRenderDataRecursive();
1021     /// calls specified function recursively for all elements of DOM tree
1022     void recurseElements( void (*pFun)( ldomNode * node ) );
1023     /// calls specified function recursively for all elements of DOM tree matched by matchFun
1024     void recurseMatchingElements( void (*pFun)( ldomNode * node ), bool (*matchFun)( ldomNode * node ) );
1025     /// calls specified function recursively for all elements of DOM tree, children before parent
1026     void recurseElementsDeepFirst( void (*pFun)( ldomNode * node ) );
1027     /// calls specified function recursively for all nodes of DOM tree
1028     void recurseNodes( void (*pFun)( ldomNode * node ) );
1029 
1030 
1031     /// returns first text child element
1032     ldomNode * getFirstTextChild( bool skipEmpty=false );
1033     /// returns last text child element
1034     ldomNode * getLastTextChild();
1035 
1036 #if BUILD_LITE!=1
1037     /// find node by coordinates of point in formatted document
1038     ldomNode * elementFromPoint( lvPoint pt, int direction, bool strict_bounds_checking=false );
1039     /// find final node by coordinates of point in formatted document
1040     ldomNode * finalBlockFromPoint( lvPoint pt );
1041 #endif
1042 
1043     // rich interface stubs for supporting Element operations
1044     /// returns rendering method
1045     lvdom_element_render_method getRendMethod();
1046     /// sets rendering method
1047     void setRendMethod( lvdom_element_render_method );
1048 #if BUILD_LITE!=1
1049     /// returns element style record
1050     css_style_ref_t getStyle() const;
1051     /// returns element font
1052     font_ref_t getFont();
1053     /// sets element font
1054     void setFont( font_ref_t );
1055     /// sets element style record
1056     void setStyle( css_style_ref_t & );
1057 #endif
1058     /// returns first child node
1059     ldomNode * getFirstChild() const;
1060     /// returns last child node
1061     ldomNode * getLastChild() const;
1062     /// removes and deletes last child element
1063     void removeLastChild();
1064     /// move range of children startChildIndex to endChildIndex inclusively to specified element
1065     void moveItemsTo( ldomNode *, int , int );
1066     /// find child element by tag id
1067     ldomNode * findChildElement( lUInt16 nsid, lUInt16 id, int index );
1068     /// find child element by id path
1069     ldomNode * findChildElement( lUInt16 idPath[] );
1070     /// inserts child element
1071     ldomNode * insertChildElement( lUInt32 index, lUInt16 nsid, lUInt16 id );
1072     /// inserts child element
1073     ldomNode * insertChildElement( lUInt16 id );
1074     /// inserts child text
1075     ldomNode * insertChildText( lUInt32 index, const lString32 & value );
1076     /// inserts child text
1077     ldomNode * insertChildText( const lString32 & value );
1078     /// inserts child text
1079     ldomNode * insertChildText(const lString8 & value, bool before_last_child=false);
1080     /// remove child
1081     ldomNode * removeChild( lUInt32 index );
1082 
1083     /// returns XPath segment for this element relative to parent element (e.g. "p[10]")
1084     lString32 getXPathSegment();
1085 
1086     /// creates stream to read base64 encoded data from element
1087     LVStreamRef createBase64Stream();
1088 #if BUILD_LITE!=1
1089     /// returns object image source
1090     LVImageSourceRef getObjectImageSource();
1091     /// returns object image ref name
1092     lString32 getObjectImageRefName( bool percentDecode=true );
1093     /// returns object image stream
1094     LVStreamRef getObjectImageStream();
1095     /// returns the sum of this node and its parents' top and bottom margins, borders and paddings
1096     int getSurroundingAddedHeight();
1097     /// formats final block
1098     int renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor * fmt, int width,
1099                               BlockFloatFootprint * float_footprint=NULL );
1100     /// formats final block again after change, returns true if size of block is changed
1101     bool refreshFinalBlock();
1102 #endif
1103     /// replace node with r/o persistent implementation
1104     ldomNode * persist();
1105     /// replace node with r/w implementation
1106     ldomNode * modify();
1107 
1108     /// for display:list-item node, get marker
1109     bool getNodeListMarker( int & counterValue, lString32 & marker, int & markerWidth );
1110     /// is node a floating floatBox
1111     bool isFloatingBox() const;
1112     /// is node an inlineBox that has not been re-inlined by having
1113     /// its child no more inline-block/inline-table
1114     bool isBoxingInlineBox() const;
1115     /// is node an inlineBox that wraps a bogus embedded block (not inline-block/inline-table)
1116     /// can be called with inline_box_checks_done=true when isBoxingInlineBox() has already
1117     /// been called to avoid rechecking what is known
1118     bool isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done=false) const;
1119 
1120     /// is node any of our internal boxing element (or, optionally, our pseudoElem)
1121     bool isBoxingNode( bool orPseudoElem=false ) const;
1122 
1123     /// return real (as in the original HTML) parent/siblings by skipping any internal
1124     /// boxing element up or down (returns NULL when no more sibling)
1125     ldomNode * getUnboxedParent() const;
1126     ldomNode * getUnboxedFirstChild( bool skip_text_nodes=false ) const;
1127     ldomNode * getUnboxedLastChild( bool skip_text_nodes=false ) const;
1128     ldomNode * getUnboxedPrevSibling( bool skip_text_nodes=false ) const;
1129     ldomNode * getUnboxedNextSibling( bool skip_text_nodes=false ) const;
1130 };
1131 
1132 
1133 // default: 512K
1134 #define DEF_DOC_DATA_BUFFER_SIZE 0x80000
1135 
1136 /// Base class for XML DOM documents
1137 /**
1138     Helps to decrease memory usage and increase performance for DOM implementations.
1139     Maintains Name<->Id maps for element names, namespaces and attributes.
1140     It allows to use short IDs instead of strings in DOM internals,
1141     and avoid duplication of string values.
1142 
1143 	Manages data storage.
1144 */
1145 class lxmlDocBase : public tinyNodeCollection
1146 {
1147     friend struct ldomNode;
1148 	friend class ldomXPointer;
1149 protected:
1150 
1151 
1152     /// Default constructor
1153     lxmlDocBase(int dataBufSize = DEF_DOC_DATA_BUFFER_SIZE);
1154     /// Copy constructor - copies ID tables contents
1155     lxmlDocBase( lxmlDocBase & doc );
1156 public:
1157     /// Destructor
1158     virtual ~lxmlDocBase();
1159 
1160 #if BUILD_LITE!=1
1161 	/// serialize to byte array (pointer will be incremented by number of bytes written)
1162 	void serializeMaps( SerialBuf & buf );
1163 	/// deserialize from byte array (pointer will be incremented by number of bytes read)
1164 	bool deserializeMaps( SerialBuf & buf );
1165 
1166 #endif
1167 
1168     //======================================================================
1169     // Name <-> Id maps functions
1170 
1171     /// Get namespace name by id
1172     /**
1173         \param id is numeric value of namespace
1174         \return string value of namespace
1175     */
getNsName(lUInt16 id)1176     inline const lString32 & getNsName( lUInt16 id )
1177     {
1178         return _nsNameTable.nameById( id );
1179     }
1180 
1181     /// Get namespace id by name
1182     /**
1183         \param name is string value of namespace
1184         \return id of namespace
1185     */
1186     lUInt16 getNsNameIndex( const lChar32 * name );
1187 
1188     /// Get namespace id by name
1189     /**
1190         \param name is string value of namespace (ASCII only)
1191         \return id of namespace
1192     */
1193     lUInt16 getNsNameIndex( const lChar8 * name );
1194 
1195     /// Get attribute name by id
1196     /**
1197         \param id is numeric value of attribute
1198         \return string value of attribute
1199     */
getAttrName(lUInt16 id)1200     inline const lString32 & getAttrName( lUInt16 id )
1201     {
1202         return _attrNameTable.nameById( id );
1203     }
1204 
1205     /// Get attribute id by name
1206     /**
1207         \param name is string value of attribute
1208         \return id of attribute
1209     */
1210     lUInt16 getAttrNameIndex( const lChar32 * name );
1211 
1212     /// Get attribute id by name
1213     /**
1214         \param name is string value of attribute (8bit ASCII only)
1215         \return id of attribute
1216     */
1217     lUInt16 getAttrNameIndex( const lChar8 * name );
1218 
1219     /// helper: returns attribute value
getAttrValue(lUInt32 index)1220     inline const lString32 & getAttrValue( lUInt32 index ) const
1221     {
1222         return _attrValueTable[index];
1223     }
1224 
1225     /// helper: returns attribute value index
getAttrValueIndex(const lChar32 * value)1226     inline lUInt32 getAttrValueIndex( const lChar32 * value )
1227     {
1228         return (lUInt32)_attrValueTable.add( value );
1229     }
1230 
1231     /// helper: returns attribute value index, 0xffffffff if not found
findAttrValueIndex(const lChar32 * value)1232     inline lUInt32 findAttrValueIndex( const lChar32 * value )
1233     {
1234         return (lUInt32)_attrValueTable.find( value );
1235     }
1236 
1237     /// Get element name by id
1238     /**
1239         \param id is numeric value of element name
1240         \return string value of element name
1241     */
getElementName(lUInt16 id)1242     inline const lString32 & getElementName( lUInt16 id )
1243     {
1244         return _elementNameTable.nameById( id );
1245     }
1246 
1247     /// Get element id by name
1248     /**
1249         \param name is string value of element name
1250         \return id of element
1251     */
1252     lUInt16 getElementNameIndex( const lChar32 * name );
1253 
1254     /// Get element id by name
1255     /**
1256         \param name is string value of element name (8bit ASCII only)
1257         \return id of element, allocates new ID if not found
1258     */
1259     lUInt16 getElementNameIndex( const lChar8 * name );
1260 
1261     /// Get element id by name
1262     /**
1263         \param name is string value of element name (8bit ASCII only)
1264         \return id of element, 0 if not found
1265     */
1266     lUInt16 findElementNameIndex( const lChar8 * name );
1267 
1268     /// Get element type properties structure by id
1269     /**
1270         \param id is element id
1271         \return pointer to elem_def_t structure containing type properties
1272         \sa elem_def_t
1273     */
getElementTypePtr(lUInt16 id)1274     inline const css_elem_def_props_t * getElementTypePtr( lUInt16 id )
1275     {
1276         return _elementNameTable.dataById( id );
1277     }
1278 
1279     // set node types from table
1280     void setNodeTypes( const elem_def_t * node_scheme );
1281     // set attribute types from table
1282     void setAttributeTypes( const attr_def_t * attr_scheme );
1283     // set namespace types from table
1284     void setNameSpaceTypes( const ns_def_t * ns_scheme );
1285 
1286     // debug dump
1287     void dumpUnknownEntities( const char * fname );
1288     lString32Collection getUnknownEntities();
1289 
1290     /// garbage collector
gc()1291     virtual void gc()
1292     {
1293 #if BUILD_LITE!=1
1294         fontMan->gc();
1295 #endif
1296     }
1297 
getStyleSheet()1298     inline LVStyleSheet * getStyleSheet() { return &_stylesheet; }
1299     /// sets style sheet, clears old content of css if arg replace is true
1300     void setStyleSheet( const char * css, bool replace );
1301 
1302 #if BUILD_LITE!=1
1303     /// apply document's stylesheet to element node
applyStyle(ldomNode * element,css_style_rec_t * pstyle)1304     inline void applyStyle( ldomNode * element, css_style_rec_t * pstyle)
1305     {
1306         _stylesheet.apply( element, pstyle );
1307     }
1308 #endif
1309 
1310     void onAttributeSet( lUInt16 attrId, lUInt32 valueId, ldomNode * node );
1311 
1312     /// get element by id attribute value code
getNodeById(lUInt32 attrValueId)1313     inline ldomNode * getNodeById( lUInt32 attrValueId )
1314     {
1315         return getTinyNode( _idNodeMap.get( attrValueId ) );
1316     }
1317 
1318     /// get element by id attribute value
getElementById(const lChar32 * id)1319     inline ldomNode * getElementById( const lChar32 * id )
1320     {
1321         lUInt32 attrValueId = getAttrValueIndex( id );
1322         ldomNode * node = getNodeById( attrValueId );
1323         return node;
1324     }
1325     /// returns root element
1326     ldomNode * getRootNode();
1327 
1328     /// returns code base path relative to document container
getCodeBase()1329     inline lString32 getCodeBase() { return getProps()->getStringDef(DOC_PROP_CODE_BASE, ""); }
1330     /// sets code base path relative to document container
setCodeBase(const lString32 & codeBase)1331     inline void setCodeBase(const lString32 & codeBase) { getProps()->setStringDef(DOC_PROP_CODE_BASE, codeBase); }
1332 
1333 #ifdef _DEBUG
1334 #if BUILD_LITE!=1
1335     ///debug method, for DOM tree consistency check, returns false if failed
1336     bool checkConsistency( bool requirePersistent );
1337 #endif
1338 #endif
1339 
1340 
1341     /// create formatted text object with options set
1342     LFormattedText * createFormattedText();
1343 
1344 #if BUILD_LITE!=1
setHightlightOptions(text_highlight_options_t & options)1345     void setHightlightOptions(text_highlight_options_t & options) {
1346         _highlightOptions = options;
1347     }
1348 #endif
1349 
1350 protected:
1351 #if BUILD_LITE!=1
1352     struct DocFileHeader {
1353         lUInt32 render_dx;
1354         lUInt32 render_dy;
1355         lUInt32 render_docflags;
1356         lUInt32 render_style_hash;
1357         lUInt32 stylesheet_hash;
1358         lUInt32 node_displaystyle_hash;
1359         bool serialize( SerialBuf & buf );
1360         bool deserialize( SerialBuf & buf );
DocFileHeaderDocFileHeader1361         DocFileHeader()
1362             : render_dx(0), render_dy(0), render_docflags(0), render_style_hash(0), stylesheet_hash(0),
1363                 node_displaystyle_hash(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED)
1364         {
1365         }
1366     };
1367     DocFileHeader _hdr;
1368     text_highlight_options_t _highlightOptions;
1369 #endif
1370 
1371     LDOMNameIdMap _elementNameTable;    // Element Name<->Id map
1372     LDOMNameIdMap _attrNameTable;       // Attribute Name<->Id map
1373     LDOMNameIdMap _nsNameTable;          // Namespace Name<->Id map
1374     lUInt16       _nextUnknownElementId; // Next Id for unknown element
1375     lUInt16       _nextUnknownAttrId;    // Next Id for unknown attribute
1376     lUInt16       _nextUnknownNsId;      // Next Id for unknown namespace
1377     lString32HashedCollection _attrValueTable;
1378     LVHashTable<lUInt32,lInt32> _idNodeMap; // id to data index map
1379     LVHashTable<lString32,LVImageSourceRef> _urlImageMap; // url to image source map
1380     lUInt16 _idAttrId; // Id for "id" attribute name
1381     lUInt16 _nameAttrId; // Id for "name" attribute name
1382 
1383 #if BUILD_LITE!=1
1384     SerialBuf _pagesData;
1385 #endif
1386 
1387 };
1388 
1389 /*
1390 struct lxmlNode
1391 {
1392     lUInt32 parent;
1393     lUInt8  nodeType;
1394     lUInt8  nodeLevel;
1395 };
1396 */
1397 
1398 struct lxmlAttribute
1399 {
1400     //
1401     lUInt16 nsid;
1402     lUInt16 id;
1403     lUInt32 index;
comparelxmlAttribute1404     inline bool compare( lUInt16 nsId, lUInt16 attrId )
1405     {
1406         return (nsId == nsid || nsId == LXML_NS_ANY) && (id == attrId);
1407     }
setDatalxmlAttribute1408     inline void setData( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
1409     {
1410         nsid = nsId;
1411         id = attrId;
1412         index = valueIndex;
1413     }
1414 };
1415 
1416 class ldomDocument;
1417 
1418 
1419 #define LDOM_ALLOW_NODE_INDEX 0
1420 
1421 
1422 class ldomDocument;
1423 
1424 /**
1425  * @brief XPointer/XPath object with reference counting.
1426  *
1427  */
1428 class ldomXPointer
1429 {
1430 protected:
1431 	friend class ldomXPointerEx;
1432 	struct XPointerData {
1433 	protected:
1434 		ldomDocument * _doc;
1435 		lInt32 _dataIndex;
1436 		int _offset;
1437 		int _refCount;
1438 	public:
addRefXPointerData1439 		inline void addRef() { _refCount++; }
decRefXPointerData1440 		inline int decRef() { return --_refCount; }
1441 		// create empty
XPointerDataXPointerData1442 		XPointerData() : _doc(NULL), _dataIndex(0), _offset(0), _refCount(1) { }
1443 		// create instance
XPointerDataXPointerData1444         XPointerData( ldomNode * node, int offset )
1445 			: _doc(node?node->getDocument():NULL)
1446 			, _dataIndex(node?node->getDataIndex():0)
1447 			, _offset( offset )
1448 			, _refCount( 1 )
1449 		{ }
1450 		// clone
XPointerDataXPointerData1451 		XPointerData( const XPointerData & v )  : _doc(v._doc), _dataIndex(v._dataIndex), _offset(v._offset), _refCount(1) { }
getDocumentXPointerData1452 		inline ldomDocument * getDocument() { return _doc; }
1453         inline bool operator == (const XPointerData & v) const
1454 		{
1455 			return _doc==v._doc && _dataIndex == v._dataIndex && _offset == v._offset;
1456 		}
1457 		inline bool operator != (const XPointerData & v) const
1458 		{
1459 			return _doc!=v._doc || _dataIndex != v._dataIndex || _offset != v._offset;
1460 		}
isNullXPointerData1461 		inline bool isNull() { return _dataIndex==0 || _doc==NULL; }
getNodeXPointerData1462         inline ldomNode * getNode() { return _dataIndex>0 ? ((lxmlDocBase*)_doc)->getTinyNode( _dataIndex ) : NULL; }
getOffsetXPointerData1463 		inline int getOffset() { return _offset; }
setNodeXPointerData1464         inline void setNode( ldomNode * node )
1465 		{
1466 			if ( node ) {
1467 				_doc = node->getDocument();
1468 				_dataIndex = node->getDataIndex();
1469 			} else {
1470 				_doc = NULL;
1471 				_dataIndex = 0;
1472 			}
1473 		}
setOffsetXPointerData1474 		inline void setOffset( int offset ) { _offset = offset; }
addOffsetXPointerData1475         inline void addOffset( int offset ) { _offset+=offset; }
~XPointerDataXPointerData1476         ~XPointerData() { }
1477 	};
1478 	XPointerData * _data;
1479 	/// node pointer
1480     //ldomNode * _node;
1481 	/// offset within node for pointer, -1 for xpath
1482 	//int _offset;
1483 	// cloning constructor
ldomXPointer(const XPointerData * data)1484 	ldomXPointer( const XPointerData * data )
1485 		: _data( new XPointerData( *data ) )
1486 	{
1487 	}
1488 public:
1489     /// clear pointer (make null)
clear()1490     void clear()
1491     {
1492         if (_data->decRef() == 0)
1493             delete _data;
1494         _data = new XPointerData();
1495     }
1496     /// return document
getDocument()1497 	inline ldomDocument * getDocument() { return _data->getDocument(); }
1498     /// returns node pointer
getNode()1499     inline ldomNode * getNode() const { return _data->getNode(); }
1500 #if BUILD_LITE!=1
1501     /// return parent final node, if found
1502     ldomNode * getFinalNode() const;
1503     /// return true is this node is a final node
1504     bool isFinalNode() const;
1505 #endif
1506     /// returns offset within node
getOffset()1507 	inline int getOffset() const { return _data->getOffset(); }
1508 	/// set pointer node
setNode(ldomNode * node)1509     inline void setNode( ldomNode * node ) { _data->setNode( node ); }
1510 	/// set pointer offset within node
setOffset(int offset)1511 	inline void setOffset( int offset ) { _data->setOffset( offset ); }
1512     /// default constructor makes NULL pointer
ldomXPointer()1513 	ldomXPointer()
1514 		: _data( new XPointerData() )
1515 	{
1516 	}
1517 	/// remove reference
~ldomXPointer()1518 	~ldomXPointer()
1519 	{
1520 		if (_data->decRef() == 0)
1521 			delete _data;
1522 	}
1523     /// copy constructor
ldomXPointer(const ldomXPointer & v)1524 	ldomXPointer( const ldomXPointer& v )
1525 		: _data(v._data)
1526 	{
1527 		_data->addRef();
1528 	}
1529     /// assignment operator
1530 	ldomXPointer & operator =( const ldomXPointer& v )
1531 	{
1532 		if ( _data==v._data )
1533 			return *this;
1534 		if (_data->decRef() == 0)
1535 			delete _data;
1536 		_data = v._data;
1537 		_data->addRef();
1538 		return *this;
1539 	}
1540     /// constructor
ldomXPointer(ldomNode * node,int offset)1541     ldomXPointer( ldomNode * node, int offset )
1542 		: _data( new XPointerData( node, offset ) )
1543 	{
1544 	}
1545     /// get pointer for relative path
1546     ldomXPointer relative( lString32 relativePath );
1547     /// get pointer for relative path
relative(const lChar32 * relativePath)1548     ldomXPointer relative( const lChar32 * relativePath )
1549     {
1550         return relative( lString32(relativePath) );
1551     }
1552 
1553     /// returns true for NULL pointer
isNull()1554 	bool isNull() const
1555 	{
1556         return !_data || _data->isNull();
1557 	}
1558     /// returns true if object is pointer
isPointer()1559 	bool isPointer() const
1560 	{
1561 		return !_data->isNull() && getOffset()>=0;
1562 	}
1563     /// returns true if object is path (no offset specified)
isPath()1564 	bool isPath() const
1565 	{
1566 		return !_data->isNull() && getOffset()==-1;
1567 	}
1568     /// returns true if pointer is NULL
1569 	bool operator !() const
1570 	{
1571 		return _data->isNull();
1572 	}
1573     /// returns true if pointers are equal
1574 	bool operator == (const ldomXPointer & v) const
1575 	{
1576 		return *_data == *v._data;
1577 	}
1578     /// returns true if pointers are not equal
1579 	bool operator != (const ldomXPointer & v) const
1580 	{
1581 		return *_data != *v._data;
1582 	}
1583 //#if BUILD_LITE!=1
1584     /// returns caret rectangle for pointer inside formatted document
1585     bool getRect(lvRect & rect, bool extended=false, bool adjusted=false) const;
1586     /// returns glyph rectangle for pointer inside formatted document considering paddings and borders
1587     /// (with adjusted=true, adjust for left and right side bearing of the glyph, for cleaner highlighting)
1588     bool getRectEx(lvRect & rect, bool adjusted=false) const { return getRect(rect, true, adjusted); }
1589     /// returns coordinates of pointer inside formatted document
1590     lvPoint toPoint( bool extended=false ) const;
1591 //#endif
1592     /// converts to string
1593     lString32 toString( XPointerMode mode = XPATH_USE_NAMES) {
1594         if( XPATH_USE_NAMES==mode ) {
1595             tinyNodeCollection* doc = (tinyNodeCollection*)_data->getDocument();
1596             if ( doc != NULL && doc->getDOMVersionRequested() >= DOM_VERSION_WITH_NORMALIZED_XPOINTERS )
1597                 return toStringV2();
1598             return toStringV1();
1599         }
1600         return toStringV2AsIndexes();
1601     }
1602     lString32 toStringV1(); // Using names, old, with boxing elements (non-normalized)
1603     lString32 toStringV2(); // Using names, new, without boxing elements, so: normalized
1604     lString32 toStringV2AsIndexes(); // Without element names, normalized (not used)
1605 
1606     /// returns XPath node text
1607     lString32 getText(  lChar32 blockDelimiter=0 )
1608     {
1609         ldomNode * node = getNode();
1610         if ( !node )
1611             return lString32::empty_str;
1612         return node->getText( blockDelimiter );
1613     }
1614     /// returns href attribute of <A> element, null string if not found
1615     lString32 getHRef();
1616     /// returns href attribute of <A> element, plus xpointer of <A> element itself
1617     lString32 getHRef(ldomXPointer & a_xpointer);
1618     /// create a copy of pointer data
clone()1619     ldomXPointer * clone()
1620     {
1621             return new ldomXPointer( _data );
1622     }
1623     /// returns true if current node is element
isElement()1624     inline bool isElement() const { return !isNull() && getNode()->isElement(); }
1625     /// returns true if current node is element
isText()1626     inline bool isText() const { return !isNull() && getNode()->isText(); }
1627     /// returns HTML (serialized from the DOM, may be different from the source HTML)
1628     lString8 getHtml( lString32Collection & cssFiles, int wflags=0 );
1629     lString8 getHtml( int wflags=0 ) {
1630         lString32Collection cssFiles; return getHtml(cssFiles, wflags);
1631     }
1632 };
1633 
1634 #define MAX_DOM_LEVEL 64
1635 /// Xpointer optimized to iterate through DOM tree
1636 class ldomXPointerEx : public ldomXPointer
1637 {
1638 protected:
1639     int _indexes[MAX_DOM_LEVEL];
1640     int _level;
1641     void initIndex();
1642 public:
1643     /// returns bottom level index
getIndex()1644     int getIndex() { return _indexes[_level-1]; }
1645     /// returns node level
getLevel()1646     int getLevel() { return _level; }
1647     /// default constructor
ldomXPointerEx()1648     ldomXPointerEx()
1649 	    : ldomXPointer()
1650     {
1651         initIndex();
1652     }
1653     /// constructor by node pointer and offset
ldomXPointerEx(ldomNode * node,int offset)1654     ldomXPointerEx(  ldomNode * node, int offset )
1655 		: ldomXPointer( node, offset )
1656     {
1657         initIndex();
1658     }
1659     /// copy constructor
ldomXPointerEx(const ldomXPointer & v)1660     ldomXPointerEx( const ldomXPointer& v )
1661 		: ldomXPointer( v._data )
1662     {
1663         initIndex();
1664     }
1665     /// copy constructor
ldomXPointerEx(const ldomXPointerEx & v)1666     ldomXPointerEx( const ldomXPointerEx& v )
1667 		: ldomXPointer( v._data )
1668     {
1669         _level = v._level;
1670         for ( int i=0; i<_level; i++ )
1671             _indexes[ i ] = v._indexes[i];
1672     }
1673     /// assignment operator
1674     ldomXPointerEx & operator =( const ldomXPointer& v )
1675     {
1676 		if ( _data==v._data )
1677 			return *this;
1678 		if (_data->decRef() == 0)
1679 			delete _data;
1680 		_data = new XPointerData( *v._data );
1681         initIndex();
1682         return *this;
1683     }
1684     /// assignment operator
1685     ldomXPointerEx & operator =( const ldomXPointerEx& v )
1686     {
1687 		if ( _data==v._data )
1688 			return *this;
1689 		if (_data->decRef() == 0)
1690 			delete _data;
1691 		_data = new XPointerData( *v._data );
1692         _level = v._level;
1693         for ( int i=0; i<_level; i++ )
1694             _indexes[ i ] = v._indexes[i];
1695         return *this;
1696     }
1697     /// returns true if ranges are equal
1698     bool operator == ( const ldomXPointerEx & v ) const
1699     {
1700         return _data->getDocument()==v._data->getDocument() && _data->getNode()==v._data->getNode() && _data->getOffset()==v._data->getOffset();
1701     }
1702     /// searches path for element with specific id, returns level at which element is founs, 0 if not found
1703     int findElementInPath( lUInt16 id );
1704     /// compare two pointers, returns -1, 0, +1
1705     int compare( const ldomXPointerEx& v ) const;
1706     /// move to next sibling
1707     bool nextSibling();
1708     /// move to previous sibling
1709     bool prevSibling();
1710     /// move to next sibling element
1711     bool nextSiblingElement();
1712     /// move to previous sibling element
1713     bool prevSiblingElement();
1714     /// move to parent
1715     bool parent();
1716     /// move to first child of current node
1717     bool firstChild();
1718     /// move to last child of current node
1719     bool lastChild();
1720     /// move to first element child of current node
1721     bool firstElementChild();
1722     /// move to last element child of current node
1723     bool lastElementChild();
1724     /// move to child #
1725     bool child( int index );
1726     /// move to sibling #
1727     bool sibling( int index );
1728     /// ensure that current node is element (move to parent, if not - from text node to element)
1729     bool ensureElement();
1730     /// moves pointer to parent element with FINAL render method, returns true if success
1731     bool ensureFinal();
1732     /// returns true if current node is visible element with render method == erm_final
1733     bool isVisibleFinal();
1734     /// move to next final visible node (~paragraph)
1735     bool nextVisibleFinal();
1736     /// move to previous final visible node (~paragraph)
1737     bool prevVisibleFinal();
1738     /// returns true if current node is visible element or text
1739     bool isVisible();
1740     // returns true if text node char at offset is part of a word
1741     bool isVisibleWordChar();
1742     /// move to next text node
1743     bool nextText( bool thisBlockOnly = false );
1744     /// move to previous text node
1745     bool prevText( bool thisBlockOnly = false );
1746     /// move to next visible text node
1747     bool nextVisibleText( bool thisBlockOnly = false );
1748     /// move to previous visible text node
1749     bool prevVisibleText( bool thisBlockOnly = false );
1750 
1751     /// move to prev visible char
1752     bool prevVisibleChar( bool thisBlockOnly = false );
1753     /// move to next visible char
1754     bool nextVisibleChar( bool thisBlockOnly = false );
1755 
1756     /// move to previous visible word beginning
1757     bool prevVisibleWordStart( bool thisBlockOnly = false );
1758     /// move to previous visible word end
1759     bool prevVisibleWordEnd( bool thisBlockOnly = false );
1760     /// move to next visible word beginning
1761     bool nextVisibleWordStart( bool thisBlockOnly = false );
1762     /// move to end of current word
1763     bool thisVisibleWordEnd( bool thisBlockOnly = false );
1764     /// move to next visible word end
1765     bool nextVisibleWordEnd( bool thisBlockOnly = false );
1766 
1767     /// move to previous visible word beginning (in sentence)
1768     bool prevVisibleWordStartInSentence();
1769     /// move to previous visible word end (in sentence)
1770     bool prevVisibleWordEndInSentence();
1771     /// move to next visible word beginning (in sentence)
1772     bool nextVisibleWordStartInSentence();
1773     /// move to end of current word (in sentence)
1774     bool thisVisibleWordEndInSentence();
1775     /// move to next visible word end (in sentence)
1776     bool nextVisibleWordEndInSentence();
1777 
1778     /// move to beginning of current visible text sentence
1779     bool thisSentenceStart();
1780     /// move to end of current visible text sentence
1781     bool thisSentenceEnd();
1782     /// move to beginning of next visible text sentence
1783     bool nextSentenceStart();
1784     /// move to beginning of next visible text sentence
1785     bool prevSentenceStart();
1786     /// move to end of next visible text sentence
1787     bool nextSentenceEnd();
1788     /// move to end of prev visible text sentence
1789     bool prevSentenceEnd();
1790     /// returns true if points to beginning of sentence
1791     bool isSentenceStart();
1792     /// returns true if points to end of sentence
1793     bool isSentenceEnd();
1794 
1795     /// returns true if points to last visible text inside block element
1796     bool isLastVisibleTextInBlock();
1797     /// returns true if points to first visible text inside block element
1798     bool isFirstVisibleTextInBlock();
1799 
1800     /// returns block owner node of current node (or current node if it's block)
1801     ldomNode * getThisBlockNode();
1802 
1803     /// returns true if current position is visible word beginning
1804     bool isVisibleWordStart();
1805     /// returns true if current position is visible word end
1806     bool isVisibleWordEnd();
1807     /// forward iteration by elements of DOM three
1808     bool nextElement();
1809     /// backward iteration by elements of DOM three
1810     bool prevElement();
1811     /// calls specified function recursively for all elements of DOM tree
1812     void recurseElements( void (*pFun)( ldomXPointerEx & node ) );
1813     /// calls specified function recursively for all nodes of DOM tree
1814     void recurseNodes( void (*pFun)( ldomXPointerEx & node ) );
1815 
1816     /// move to next sibling or parent's next sibling
1817     bool nextOuterElement();
1818     /// move to (end of) last and deepest child node descendant of current node
1819     bool lastInnerNode( bool toTextEnd=false );
1820     /// move to (end of) last and deepest child text node descendant of current node
1821     bool lastInnerTextNode( bool toTextEnd=false );
1822 };
1823 
1824 class ldomXRange;
1825 
1826 /// callback for DOM tree iteration interface
1827 class ldomNodeCallback {
1828 public:
1829     /// destructor
~ldomNodeCallback()1830     virtual ~ldomNodeCallback() { }
1831     /// called for each found text fragment in range
1832     virtual void onText( ldomXRange * ) = 0;
1833     /// called for each found node in range
1834     virtual bool onElement( ldomXPointerEx * ) = 0;
1835 };
1836 
1837 /// range for word inside text node
1838 class ldomWord
1839 {
1840     ldomNode * _node;
1841     int _start;
1842     int _end;
1843 public:
ldomWord()1844     ldomWord( )
1845     : _node(NULL), _start(0), _end(0)
1846     { }
ldomWord(ldomNode * node,int start,int end)1847     ldomWord( ldomNode * node, int start, int end )
1848     : _node(node), _start(start), _end(end)
1849     { }
ldomWord(const ldomWord & v)1850     ldomWord( const ldomWord & v )
1851     : _node(v._node), _start(v._start), _end(v._end)
1852     { }
1853     ldomWord & operator = ( const ldomWord & v )
1854     {
1855         _node = v._node;
1856         _start = v._start;
1857         _end = v._end;
1858         return *this;
1859     }
1860     /// returns true if object doesn't point valid word
isNull()1861     bool isNull() { return _node==NULL || _start<0 || _end<=_start; }
1862     /// get word text node pointer
getNode()1863     ldomNode * getNode() const { return _node; }
1864     /// get word start offset
getStart()1865     int getStart() const { return _start; }
1866     /// get word end offset
getEnd()1867     int getEnd() const { return _end; }
1868     /// get word start XPointer
getStartXPointer()1869     ldomXPointer getStartXPointer() const { return ldomXPointer( _node, _start ); }
1870     /// get word start XPointer
getEndXPointer()1871     ldomXPointer getEndXPointer() const { return ldomXPointer( _node, _end ); }
1872     /// get word text
getText()1873     lString32 getText()
1874     {
1875         if ( isNull() )
1876             return lString32::empty_str;
1877         lString32 txt = _node->getText();
1878         return txt.substr( _start, _end-_start );
1879     }
1880 };
1881 
1882 /// DOM range
1883 class ldomXRange {
1884     ldomXPointerEx _start;
1885     ldomXPointerEx _end;
1886     /// _flags, only used by ldomXRangeList.getRanges() when making a ldomMarkedRangeList (for native
1887     //  highlighting of a text selection being made, and for crengine internal bookmarks):
1888     //  0: not shown (filtered out in LVDocView::updateSelections() by ldomXRangeList ranges(..., true))
1889     //  1,2,3: legacy drawing (will make a single ldomMarkedRange spanning multiple lines, assuming
1890     //         full width LTR paragraphs) (2 & 3 might be used for crengine internal bookmarks,
1891     //         see hist.h for enum bmk_type)
1892     //  0x11, 0x12, 0x13:  enhanced drawing (will make multiple segmented ldomMarkedRange,
1893     //                     each spanning a single line)
1894     lUInt32 _flags;
1895 public:
ldomXRange()1896     ldomXRange()
1897         : _flags(0)
1898     {
1899     }
1900     ldomXRange( const ldomXPointerEx & start, const ldomXPointerEx & end, lUInt32 flags=0 )
_start(start)1901     : _start( start ), _end( end ), _flags(flags)
1902     {
1903     }
ldomXRange(const ldomXPointer & start,const ldomXPointer & end)1904     ldomXRange( const ldomXPointer & start, const ldomXPointer & end )
1905     : _start( start ), _end( end ), _flags(0)
1906     {
1907     }
1908     /// copy constructor
ldomXRange(const ldomXRange & v)1909     ldomXRange( const ldomXRange & v )
1910     : _start( v._start ), _end( v._end ), _flags(v._flags)
1911     {
1912     }
ldomXRange(const ldomWord & word)1913     ldomXRange( const ldomWord & word )
1914         : _start( word.getStartXPointer() ), _end( word.getEndXPointer() ), _flags(1)
1915     {
1916     }
1917     /// if start is after end, swap start and end
1918     void sort();
1919     /// create intersection of two ranges
1920     ldomXRange( const ldomXRange & v1,  const ldomXRange & v2 );
1921     /// copy constructor of full node range
1922     ldomXRange( ldomNode * p, bool fitEndToLastInnerChild=false );
1923     /// copy assignment
1924     ldomXRange & operator = ( const ldomXRange & v )
1925     {
1926         _start = v._start;
1927         _end = v._end;
1928         return *this;
1929     }
1930     /// returns true if ranges are equal
1931     bool operator == ( const ldomXRange & v ) const
1932     {
1933         return _start == v._start && _end == v._end && _flags==v._flags;
1934     }
1935     /// returns true if interval is invalid or empty
isNull()1936     bool isNull()
1937     {
1938         if ( _start.isNull() || _end.isNull() )
1939             return true;
1940         if ( _start.compare( _end ) > 0 )
1941             return true;
1942         return false;
1943     }
1944     /// makes range empty
clear()1945     void clear()
1946     {
1947         _start.clear();
1948         _end.clear();
1949         _flags = 0;
1950     }
1951     /// returns true if pointer position is inside range
isInside(const ldomXPointerEx & p)1952     bool isInside( const ldomXPointerEx & p ) const
1953     {
1954         return ( _start.compare( p ) <= 0 && _end.compare( p ) >= 0 );
1955     }
1956     /// returns interval start point
getStart()1957     ldomXPointerEx & getStart() { return _start; }
1958     /// returns interval end point
getEnd()1959     ldomXPointerEx & getEnd() { return _end; }
1960     /// sets interval start point
setStart(ldomXPointerEx & start)1961     void setStart( ldomXPointerEx & start ) { _start = start; }
1962     /// sets interval end point
setEnd(ldomXPointerEx & end)1963     void setEnd( ldomXPointerEx & end ) { _end = end; }
1964     /// returns flags value
getFlags()1965     lUInt32 getFlags() { return _flags; }
1966     /// sets new flags value
setFlags(lUInt32 flags)1967     void setFlags( lUInt32 flags ) { _flags = flags; }
1968     /// returns true if this interval intersects specified interval
1969     bool checkIntersection( ldomXRange & v );
1970     /// returns text between two XPointer positions
1971     lString32 getRangeText( lChar32 blockDelimiter='\n', int maxTextLen=0 );
1972     /// get all words from specified range
1973     void getRangeWords( LVArray<ldomWord> & list );
1974     /// returns href attribute of <A> element, null string if not found
1975     lString32 getHRef();
1976     /// returns href attribute of <A> element, plus xpointer of <A> element itself
1977     lString32 getHRef(ldomXPointer & a_xpointer);
1978     /// sets range to nearest word bounds, returns true if success
1979     static bool getWordRange( ldomXRange & range, ldomXPointer & p );
1980     /// run callback for each node in range
1981     void forEach( ldomNodeCallback * callback );
1982 #if BUILD_LITE!=1
1983     /// returns rectangle (in doc coordinates) for range. Returns true if found.
1984     bool getRectEx( lvRect & rect, bool & isSingleLine );
getRectEx(lvRect & rect)1985     bool getRectEx( lvRect & rect ) {
1986         bool isSingleLine; return getRectEx(rect, isSingleLine);
1987     };
1988     // returns multiple segments rects (one for each text line)
1989     // that the ldomXRange spans on the page.
1990     void getSegmentRects( LVArray<lvRect> & rects );
1991 #endif
1992     /// returns nearest common element for start and end points
1993     ldomNode * getNearestCommonParent();
1994     /// returns HTML (serialized from the DOM, may be different from the source HTML)
1995     lString8 getHtml( lString32Collection & cssFiles, int wflags=0, bool fromRootNode=false );
1996     lString8 getHtml( int wflags=0, bool fromRootNode=false ) {
1997         lString32Collection cssFiles; return getHtml(cssFiles, wflags, fromRootNode);
1998     };
1999 
2000     /// searches for specified text inside range
2001     bool findText( lString32 pattern, bool caseInsensitive, bool reverse, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY = -1, bool checkMaxFromStart = false );
2002 };
2003 
2004 class ldomMarkedText
2005 {
2006 public:
2007     lString32 text;
2008     lUInt32   flags;
2009     int offset;
ldomMarkedText(lString32 s,lUInt32 flg,int offs)2010     ldomMarkedText( lString32 s, lUInt32 flg, int offs )
2011     : text(s), flags(flg), offset(offs)
2012     {
2013     }
ldomMarkedText(const ldomMarkedText & v)2014     ldomMarkedText( const ldomMarkedText & v )
2015     : text(v.text), flags(v.flags)
2016     {
2017     }
2018 };
2019 
2020 typedef LVPtrVector<ldomMarkedText> ldomMarkedTextList;
2021 
2022 enum MoveDirection {
2023     DIR_ANY,
2024     DIR_LEFT,
2025     DIR_RIGHT,
2026     DIR_UP,
2027     DIR_DOWN
2028 };
2029 
2030 /// range in document, marked with specified flags
2031 class ldomMarkedRange
2032 {
2033 public:
2034     /// start document point
2035     lvPoint   start;
2036     /// end document point
2037     lvPoint   end;
2038     /// flags:
2039     //  0: not shown
2040     //  1,2,3: legacy drawing (a single mark may spans multiple lines, assuming full width
2041     //         LTR paragraphs) (2 & 3 might be used for crengine internal bookmarks,
2042     //         see hist.h for enum bmk_type)
2043     //  0x11, 0x12, 0x13:  enhanced drawing (segmented mark, spanning a single line)
2044     lUInt32   flags;
empty()2045     bool empty()
2046     {
2047         return ( start.y>end.y || ( start.y == end.y && start.x >= end.x ) );
2048     }
2049     /// returns mark middle point for single line mark, or start point for multiline mark
2050     lvPoint getMiddlePoint();
2051     /// returns distance (dx+dy) from specified point to middle point
2052     int calcDistance( int x, int y, MoveDirection dir );
2053     /// returns true if intersects specified line rectangle
2054     bool intersects( lvRect & rc, lvRect & intersection );
2055     /// constructor
ldomMarkedRange(lvPoint _start,lvPoint _end,lUInt32 _flags)2056     ldomMarkedRange( lvPoint _start, lvPoint _end, lUInt32 _flags )
2057     : start(_start), end(_end), flags(_flags)
2058     {
2059     }
2060     /// constructor
ldomMarkedRange(ldomWord & word)2061     ldomMarkedRange( ldomWord & word ) {
2062         ldomXPointer startPos(word.getNode(), word.getStart() );
2063         ldomXPointer endPos(word.getNode(), word.getEnd() );
2064         start = startPos.toPoint();
2065         end = endPos.toPoint();
2066     }
2067     /// copy constructor
ldomMarkedRange(const ldomMarkedRange & v)2068     ldomMarkedRange( const ldomMarkedRange & v )
2069     : start(v.start), end(v.end), flags(v.flags)
2070     {
2071     }
2072 };
2073 
2074 class ldomWordEx : public ldomWord
2075 {
2076     ldomWord _word;
2077     ldomMarkedRange _mark;
2078     ldomXRange _range;
2079     lString32 _text;
2080 public:
ldomWordEx(ldomWord & word)2081     ldomWordEx( ldomWord & word )
2082         :  _word(word), _mark(word), _range(word)
2083     {
2084         _text = removeSoftHyphens( _word.getText() );
2085     }
getWord()2086     ldomWord & getWord() { return _word; }
getRange()2087     ldomXRange & getRange() { return _range; }
getMark()2088     ldomMarkedRange & getMark() { return _mark; }
getText()2089     lString32 & getText() { return _text; }
2090 };
2091 
2092 /// list of extended words
2093 class ldomWordExList : public LVPtrVector<ldomWordEx>
2094 {
2095     int minx;
2096     int maxx;
2097     int miny;
2098     int maxy;
2099     int x;
2100     int y;
2101     ldomWordEx * selWord;
2102     lString32Collection pattern;
2103     void init();
2104     ldomWordEx * findWordByPattern();
2105 public:
ldomWordExList()2106     ldomWordExList()
2107         : minx(-1), maxx(-1), miny(-1), maxy(-1), x(-1), y(-1), selWord(NULL)
2108     {
2109     }
2110     /// adds all visible words from range, returns number of added words
2111     int addRangeWords( ldomXRange & range, bool trimPunctuation );
2112     /// find word nearest to specified point
2113     ldomWordEx * findNearestWord( int x, int y, MoveDirection dir );
2114     /// select word
2115     void selectWord( ldomWordEx * word, MoveDirection dir );
2116     /// select next word in specified direction
2117     ldomWordEx * selectNextWord( MoveDirection dir, int moveBy = 1 );
2118     /// select middle word in range
2119     ldomWordEx * selectMiddleWord();
2120     /// get selected word
getSelWord()2121     ldomWordEx * getSelWord() { return selWord; }
2122     /// try append search pattern and find word
2123     ldomWordEx * appendPattern(lString32 chars);
2124     /// remove last character from pattern and try to search
2125     ldomWordEx * reducePattern();
2126 };
2127 
2128 
2129 /// list of marked ranges
2130 class ldomMarkedRangeList : public LVPtrVector<ldomMarkedRange>
2131 {
2132 public:
ldomMarkedRangeList()2133     ldomMarkedRangeList()
2134     {
2135     }
2136     /// create bounded by RC list, with (0,0) coordinates at left top corner
2137     // crop/discard elements outside of rc (or outside of crop_rc instead if provided)
2138     ldomMarkedRangeList( const ldomMarkedRangeList * list, lvRect & rc, lvRect * crop_rc=NULL );
2139 };
2140 
2141 class ldomXRangeList : public LVPtrVector<ldomXRange>
2142 {
2143 public:
2144     /// add ranges for words
addWords(const LVArray<ldomWord> & words)2145     void addWords( const LVArray<ldomWord> & words )
2146     {
2147         for ( int i=0; i<words.length(); i++ )
2148             LVPtrVector<ldomXRange>::add( new ldomXRange( words[i] ) );
2149     }
ldomXRangeList(const LVArray<ldomWord> & words)2150     ldomXRangeList( const LVArray<ldomWord> & words )
2151     {
2152         addWords( words );
2153     }
2154     /// create list splittiny existing list into non-overlapping ranges
2155     ldomXRangeList( ldomXRangeList & srcList, bool splitIntersections );
2156     /// create list by filtering existing list, to get only values which intersect filter range
2157     ldomXRangeList( ldomXRangeList & srcList, ldomXRange & filter );
2158 #if BUILD_LITE!=1
2159     /// fill text selection list by splitting text into monotonic flags ranges
2160     void splitText( ldomMarkedTextList &dst, ldomNode * textNodeToSplit );
2161     /// fill marked ranges list
2162     void getRanges( ldomMarkedRangeList &dst );
2163 #endif
2164     /// split into subranges using intersection
2165     void split( ldomXRange * r );
2166     /// default constructor for empty list
ldomXRangeList()2167     ldomXRangeList() {};
2168 };
2169 
2170 class LVTocItem;
2171 class LVDocView;
2172 
2173 /// TOC item
2174 class LVTocItem
2175 {
2176     friend class LVDocView;
2177 private:
2178     LVTocItem *     _parent;
2179     ldomDocument *  _doc;
2180     lInt32          _level;
2181     lInt32          _index;
2182     lInt32          _page;
2183     lInt32          _percent;
2184     lString32       _name;
2185     lString32       _path;
2186     ldomXPointer    _position;
2187     LVPtrVector<LVTocItem> _children;
2188     //====================================================
2189     //LVTocItem( ldomXPointer pos, const lString32 & name ) : _parent(NULL), _level(0), _index(0), _page(0), _percent(0), _name(name), _path(pos.toString()), _position(pos) { }
LVTocItem(ldomXPointer pos,lString32 path,const lString32 & name)2190     LVTocItem( ldomXPointer pos, lString32 path, const lString32 & name ) : _parent(NULL), _level(0), _index(0), _page(0), _percent(0), _name(name), _path(path), _position(pos) { }
addChild(LVTocItem * item)2191     void addChild( LVTocItem * item ) { item->_level=_level+1; item->_parent=this; item->_index=_children.length(), item->_doc=_doc; _children.add(item); }
2192     //====================================================
setPage(int n)2193     void setPage( int n ) { _page = n; }
setPercent(int n)2194     void setPercent( int n ) { _percent = n; }
2195 public:
2196     /// serialize to byte array (pointer will be incremented by number of bytes written)
2197     bool serialize( SerialBuf & buf );
2198     /// deserialize from byte array (pointer will be incremented by number of bytes read)
2199     bool deserialize( ldomDocument * doc, SerialBuf & buf );
2200     /// get page number
getPage()2201     int getPage() { return _page; }
2202     /// get position percent * 100
getPercent()2203     int getPercent() { return _percent; }
2204     /// returns parent node pointer
getParent()2205     LVTocItem * getParent() const { return _parent; }
2206     /// returns node level (0==root node, 1==top level)
getLevel()2207     int getLevel() const { return _level; }
2208     /// returns node index
getIndex()2209     int getIndex() const { return _index; }
2210     /// returns section title
getName()2211     lString32 getName() const { return _name; }
2212     /// returns position pointer
2213     ldomXPointer getXPointer();
2214     /// set position pointer (for cases where we need to create a LVTocItem as a container, but
2215     /// we'll know the xpointer only later, mostly always the same xpointer as its first child)
setXPointer(ldomXPointer xp)2216     void setXPointer(ldomXPointer xp) { _position = xp; }
2217     /// returns position path
2218     lString32 getPath();
2219     /// returns Y position
2220     int getY();
2221     /// returns page number
2222     //int getPageNum( LVRendPageList & pages );
2223     /// returns child node count
getChildCount()2224     int getChildCount() const { return _children.length(); }
2225     /// returns child node by index
getChild(int index)2226     LVTocItem * getChild( int index ) const { return _children[index]; }
2227     /// add child TOC node
addChild(const lString32 & name,ldomXPointer ptr,lString32 path)2228     LVTocItem * addChild( const lString32 & name, ldomXPointer ptr, lString32 path )
2229     {
2230         LVTocItem * item = new LVTocItem( ptr, path, name );
2231         addChild( item );
2232         return item;
2233     }
clear()2234     void clear() { _children.clear(); }
2235     // root node constructor
LVTocItem(ldomDocument * doc)2236     LVTocItem( ldomDocument * doc ) : _parent(NULL), _doc(doc), _level(0), _index(0), _page(0) { }
~LVTocItem()2237     ~LVTocItem() { clear(); }
2238 
2239     /// For use on the root toc item only (_page, otherwise unused, can be used to store this flag)
setAlternativeTocFlag()2240     void setAlternativeTocFlag() { if (_level==0) _page = 1; }
hasAlternativeTocFlag()2241     bool hasAlternativeTocFlag() { return _level==0 && _page==1; }
2242 
2243     /// When page numbers have been calculated, LVDocView::updatePageNumbers()
2244     /// sets the root toc item _percent to -1. So let's use it to know that fact.
hasValidPageNumbers()2245     bool hasValidPageNumbers() { return _level==0 && _percent == -1; }
invalidatePageNumbers()2246     void invalidatePageNumbers() { if (_level==0) _percent = 0; }
2247 };
2248 
2249 /// PageMapItem
2250 class LVPageMapItem
2251 {
2252     friend class LVDocView;
2253     friend class LVPageMap;
2254 private:
2255     ldomDocument *  _doc;
2256     lInt32          _index;
2257     lInt32          _page;
2258     lInt32          _doc_y;
2259     lString32       _label;
2260     lString32       _path;
2261     ldomXPointer    _position;
LVPageMapItem(ldomXPointer pos,lString32 path,const lString32 & label)2262     LVPageMapItem( ldomXPointer pos, lString32 path, const lString32 & label )
2263         : _index(0), _page(0), _doc_y(-1), _label(label), _path(path), _position(pos)
2264         { }
setPage(int n)2265     void setPage( int n ) { _page = n; }
setDocY(int y)2266     void setDocY( int y ) { _doc_y = y; }
2267 public:
2268     /// serialize to byte array (pointer will be incremented by number of bytes written)
2269     bool serialize( SerialBuf & buf );
2270     /// deserialize from byte array (pointer will be incremented by number of bytes read)
2271     bool deserialize( ldomDocument * doc, SerialBuf & buf );
2272     /// get rendered page number
getPage()2273     int getPage() { return _page; }
2274     /// returns node index
getIndex()2275     int getIndex() const { return _index; }
2276     /// returns page label
getLabel()2277     lString32 getLabel() const { return _label; }
2278     /// returns position pointer
2279     ldomXPointer getXPointer();
2280     /// returns position path
2281     lString32 getPath();
2282     /// returns Y position
2283     int getDocY(bool refresh=false);
LVPageMapItem(ldomDocument * doc)2284     LVPageMapItem( ldomDocument * doc ) : _doc(doc), _index(0), _page(0), _doc_y(-1) { }
2285 };
2286 
2287 /// PageMapItems container
2288 class LVPageMap
2289 {
2290     friend class LVDocView;
2291 private:
2292     ldomDocument *  _doc;
2293     bool            _page_info_valid;
2294     lString32       _source;
2295     LVPtrVector<LVPageMapItem> _children;
addPage(LVPageMapItem * item)2296     void addPage( LVPageMapItem * item ) {
2297         item->_doc = _doc;
2298         item->_index = _children.length();
2299         _children.add(item);
2300     }
2301 public:
2302     /// serialize to byte array (pointer will be incremented by number of bytes written)
2303     bool serialize( SerialBuf & buf );
2304     /// deserialize from byte array (pointer will be incremented by number of bytes read)
2305     bool deserialize( ldomDocument * doc, SerialBuf & buf );
2306     /// returns child node count
getChildCount()2307     int getChildCount() const { return _children.length(); }
2308     /// returns child node by index
getChild(int index)2309     LVPageMapItem * getChild( int index ) const { return _children[index]; }
2310     /// add page item
addPage(const lString32 & label,ldomXPointer ptr,lString32 path)2311     LVPageMapItem * addPage( const lString32 & label, ldomXPointer ptr, lString32 path )
2312     {
2313         LVPageMapItem * item = new LVPageMapItem( ptr, path, label );
2314         addPage( item );
2315         return item;
2316     }
clear()2317     void clear() { _children.clear(); }
hasValidPageInfo()2318     bool hasValidPageInfo() { return _page_info_valid; }
invalidatePageInfo()2319     void invalidatePageInfo() { _page_info_valid = false; }
2320     // Page source (info about the book paper version the page labels reference)
setSource(lString32 source)2321     void setSource( lString32 source ) { _source = source; }
getSource()2322     lString32 getSource() const { return _source; }
2323     // root node constructor
LVPageMap(ldomDocument * doc)2324     LVPageMap( ldomDocument * doc )
2325         : _doc(doc), _page_info_valid(false) { }
~LVPageMap()2326     ~LVPageMap() { clear(); }
2327 };
2328 
2329 
2330 class ldomNavigationHistory
2331 {
2332     private:
2333         lString32Collection _links;
2334         int _pos;
clearTail()2335         void clearTail()
2336         {
2337             if (_links.length() > _pos)
2338                 _links.erase(_pos, _links.length() - _pos);
2339         }
2340     public:
clear()2341         void clear()
2342         {
2343             _links.clear();
2344             _pos = 0;
2345         }
save(lString32 link)2346         bool save( lString32 link )
2347         {
2348             if (_pos==(int)_links.length() && _pos>0 && _links[_pos-1]==link )
2349                 return false;
2350             if ( _pos>=(int)_links.length() || _links[_pos]!=link ) {
2351                 clearTail();
2352                 _links.add( link );
2353                 _pos = _links.length();
2354                 return true;
2355             } else if (_links[_pos]==link) {
2356                 _pos++;
2357                 return true;
2358             }
2359             return false;
2360         }
back()2361         lString32 back()
2362         {
2363             if (_pos==0)
2364                 return lString32::empty_str;
2365             return _links[--_pos];
2366         }
forward()2367         lString32 forward()
2368         {
2369             if (_pos>=(int)_links.length()-1)
2370                 return lString32::empty_str;
2371             return _links[++_pos];
2372         }
backCount()2373         int backCount()
2374         {
2375             return _pos;
2376         }
forwardCount()2377         int forwardCount()
2378         {
2379             return _links.length() - _pos;
2380         }
2381 };
2382 
2383 class ListNumberingProps
2384 {
2385 public:
2386     int maxCounter;
2387     int maxWidth;
ListNumberingProps(int c,int w)2388     ListNumberingProps( int c, int w )
2389         : maxCounter(c), maxWidth(w)
2390     {
2391     }
2392 };
2393 typedef LVRef<ListNumberingProps> ListNumberingPropsRef;
2394 
2395 class ldomDocument : public lxmlDocBase
2396 {
2397     friend class ldomDocumentWriter;
2398     friend class ldomDocumentWriterFilter;
2399 private:
2400     LVTocItem m_toc;
2401     LVPageMap m_pagemap;
2402 #if BUILD_LITE!=1
2403     font_ref_t _def_font; // default font
2404     css_style_ref_t _def_style;
2405     lUInt32 _last_docflags;
2406     int _page_height;
2407     int _page_width;
2408     bool _rendered;
2409     bool _just_rendered_from_cache;
2410     bool _toc_from_cache_valid;
2411     lUInt32 _warnings_seen_bitmap;
2412     ldomXRangeList _selections;
2413 #endif
2414 
2415     lString32 _docStylesheetFileName;
2416 
2417     LVContainerRef _container;
2418 
2419     LVHashTable<lUInt32, ListNumberingPropsRef> lists;
2420 
2421     LVEmbeddedFontList _fontList;
2422 
2423 
2424 #if BUILD_LITE!=1
2425     /// load document cache file content
2426     bool loadCacheFileContent(CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback=NULL);
2427 
2428     /// save changes to cache file
2429     bool saveChanges();
2430     /// saves changes to cache file, limited by time interval (can be called again to continue after TIMEOUT)
2431     virtual ContinuousOperationResult saveChanges( CRTimerUtil & maxTime, LVDocViewCallback * progressCallback=NULL );
2432 #endif
2433 
2434     /// create XPointer from a non-normalized string made by toStringV1()
2435     ldomXPointer createXPointerV1( ldomNode * baseNode, const lString32 & xPointerStr );
2436     /// create XPointer from a normalized string made by toStringV2()
2437     ldomXPointer createXPointerV2( ldomNode * baseNode, const lString32 & xPointerStr );
2438 protected:
2439 
2440 #if BUILD_LITE!=1
2441     void applyDocumentStyleSheet();
2442 #endif
2443 
2444 public:
2445 
2446 #if BUILD_LITE!=1
forceReinitStyles()2447     void forceReinitStyles() {
2448         dropStyles();
2449         _hdr.render_style_hash = 0;
2450         _rendered = false;
2451     }
2452 
2453     ListNumberingPropsRef getNodeNumberingProps( lUInt32 nodeDataIndex );
2454     void setNodeNumberingProps( lUInt32 nodeDataIndex, ListNumberingPropsRef v );
2455     void resetNodeNumberingProps();
2456 #endif
2457 
2458 #if BUILD_LITE!=1
2459     /// returns object image stream
2460     LVStreamRef getObjectImageStream( lString32 refName );
2461     /// returns object image source
2462     LVImageSourceRef getObjectImageSource( lString32 refName );
2463 
isDefStyleSet()2464     bool isDefStyleSet()
2465     {
2466         return !_def_style.isNull();
2467     }
2468 
2469     /// return document's embedded font list
getEmbeddedFontList()2470     LVEmbeddedFontList & getEmbeddedFontList() { return _fontList; }
2471     /// register embedded document fonts in font manager, if any exist in document
2472     void registerEmbeddedFonts();
2473     /// unregister embedded document fonts in font manager, if any exist in document
2474     void unregisterEmbeddedFonts();
2475 #endif
2476 
2477     /// returns pointer to TOC root node
getToc()2478     LVTocItem * getToc() { return &m_toc; }
2479     /// build alternative TOC from document heading elements (H1 to H6) and cr-hints, or docFragments
2480     void buildAlternativeToc();
isTocAlternativeToc()2481     bool isTocAlternativeToc() { return m_toc.hasAlternativeTocFlag(); }
2482     /// build TOC from headings
2483     void buildTocFromHeadings();
2484 
2485     /// returns pointer to PageMapItems container
getPageMap()2486     LVPageMap * getPageMap() { return &m_pagemap; }
2487 
2488 #if BUILD_LITE!=1
isTocFromCacheValid()2489     bool isTocFromCacheValid() { return _toc_from_cache_valid; }
2490 
2491     /// save document formatting parameters after render
2492     void updateRenderContext();
2493     /// check document formatting parameters before render - whether we need to reformat; returns false if render is necessary
2494     bool checkRenderContext();
2495 #endif
2496 
2497 #if BUILD_LITE!=1
2498     /// try opening from cache file, find by source file name (w/o path) and crc32
2499     virtual bool openFromCache( CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback=NULL );
2500     /// saves recent changes to mapped file
2501     virtual ContinuousOperationResult updateMap(CRTimerUtil & maxTime, LVDocViewCallback * progressCallback=NULL);
2502     /// swaps to cache file or saves changes, limited by time interval
2503     virtual ContinuousOperationResult swapToCache( CRTimerUtil & maxTime );
2504     /// saves recent changes to mapped file
2505     virtual bool updateMap(LVDocViewCallback * progressCallback=NULL) {
2506         CRTimerUtil infinite;
2507         return updateMap(infinite, progressCallback)!=CR_ERROR; // NOLINT: Call to virtual function during destruction
2508     }
2509 #endif
2510 
2511 
getContainer()2512     LVContainerRef getContainer() { return _container; }
setContainer(LVContainerRef cont)2513     void setContainer( LVContainerRef cont ) { _container = cont; }
2514 
2515 #if BUILD_LITE!=1
clearRendBlockCache()2516     void clearRendBlockCache() { _renderedBlockCache.clear(); }
2517 #endif
2518     void clear();
getDocStylesheetFileName()2519     lString32 getDocStylesheetFileName() { return _docStylesheetFileName; }
setDocStylesheetFileName(lString32 fileName)2520     void setDocStylesheetFileName(lString32 fileName) { _docStylesheetFileName = fileName; }
2521 
2522     ldomDocument();
2523     /// creates empty document which is ready to be copy target of doc partial contents
2524     ldomDocument( ldomDocument & doc );
2525 
2526 #if BUILD_LITE!=1
2527     /// return selections collection
getSelections()2528     ldomXRangeList & getSelections() { return _selections; }
2529 
2530     /// get full document height
2531     int getFullHeight();
2532     /// returns page height setting
getPageHeight()2533     int getPageHeight() { return _page_height; }
2534     /// returns page width setting
getPageWidth()2535     int getPageWidth() { return _page_width; }
2536 #endif
2537     /// saves document contents as XML to stream with specified encoding
2538     bool saveToStream( LVStreamRef stream, const char * codepage, bool treeLayout=false );
2539     /// print a warning message (only once if warning_id provided, between 1 and 32)
2540     void printWarning(const char * msg, int warning_id=0);
2541 #if BUILD_LITE!=1
2542     /// get default font reference
getDefaultFont()2543     font_ref_t getDefaultFont() { return _def_font; }
2544     /// get default style reference
getDefaultStyle()2545     css_style_ref_t getDefaultStyle() { return _def_style; }
2546 
2547     inline bool parseStyleSheet(lString32 codeBase, lString32 css);
2548     inline bool parseStyleSheet(lString32 cssFile);
2549 #endif
2550     /// destructor
2551     virtual ~ldomDocument();
2552 #if BUILD_LITE!=1
isRendered()2553     bool isRendered() { return _rendered; }
2554     /// renders (formats) document in memory: returns true if re-rendering needed, false if not
2555     virtual bool render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy,
2556                          bool showCover, int y0, font_ref_t def_font, int def_interline_space,
2557                          CRPropRef props, int usable_left_overflow=0, int usable_right_overflow=0 );
2558     /// set global rendering properties
2559     virtual bool setRenderProps( int width, int dy, bool showCover, int y0, font_ref_t def_font,
2560                                  int def_interline_space, CRPropRef props );
2561 #endif
2562     /// create xpointer from pointer string
2563     ldomXPointer createXPointer( const lString32 & xPointerStr );
2564     /// create xpointer from pointer string
nodeFromXPath(const lString32 & xPointerStr)2565     ldomNode * nodeFromXPath( const lString32 & xPointerStr )
2566     {
2567         return createXPointer( xPointerStr ).getNode();
2568     }
2569     /// get element text by pointer string
textFromXPath(const lString32 & xPointerStr)2570     lString32 textFromXPath( const lString32 & xPointerStr )
2571     {
2572         ldomNode * node = nodeFromXPath( xPointerStr );
2573         if ( !node )
2574             return lString32::empty_str;
2575         return node->getText();
2576     }
2577 
2578     /// create xpointer from relative pointer string
createXPointer(ldomNode * baseNode,const lString32 & xPointerStr)2579     ldomXPointer createXPointer( ldomNode * baseNode, const lString32 & xPointerStr )
2580     {
2581         if( _DOMVersionRequested >= DOM_VERSION_WITH_NORMALIZED_XPOINTERS)
2582             return createXPointerV2(baseNode, xPointerStr);
2583         return createXPointerV1(baseNode, xPointerStr);
2584     }
2585 
2586 #if BUILD_LITE!=1
2587     /// create xpointer from doc point
2588     ldomXPointer createXPointer( lvPoint pt, int direction=PT_DIR_EXACT, bool strictBounds=false, ldomNode * from_node=NULL );
2589     /// get rendered block cache object
getRendBlockCache()2590     CVRendBlockCache & getRendBlockCache() { return _renderedBlockCache; }
2591 
2592     bool findText( lString32 pattern, bool caseInsensitive, bool reverse, int minY, int maxY, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY = -1 );
2593 #endif
2594 };
2595 
2596 
2597 class ldomDocumentWriter;
2598 
2599 class ldomElementWriter
2600 {
2601     ldomElementWriter * _parent;
2602     ldomDocument * _document;
2603 
2604     ldomNode * _element;
2605     LVTocItem * _tocItem;
2606     lString32 _path;
2607     const css_elem_def_props_t * _typeDef;
2608     bool _allowText;
2609     bool _isBlock;
2610     bool _isSection;
2611     bool _stylesheetIsSet;
2612     bool _bodyEnterCalled;
2613     int _pseudoElementAfterChildIndex;
2614     lUInt32 _flags;
2615     lUInt32 getFlags();
2616     void updateTocItem();
2617     void onBodyEnter();
2618     void onBodyExit();
getElement()2619     ldomNode * getElement()
2620     {
2621         return _element;
2622     }
2623     lString32 getPath();
2624     void onText( const lChar32 * text, int len, lUInt32 flags, bool insert_before_last_child=false );
2625     void addAttribute( lUInt16 nsid, lUInt16 id, const lChar32 * value );
2626     //lxmlElementWriter * pop( lUInt16 id );
2627 
2628     ldomElementWriter(ldomDocument * document, lUInt16 nsid, lUInt16 id, ldomElementWriter * parent, bool insert_before_last_child=false);
2629     ~ldomElementWriter();
2630 
2631     friend class ldomDocumentWriter;
2632     friend class ldomDocumentWriterFilter;
2633     //friend ldomElementWriter * pop( ldomElementWriter * obj, lUInt16 id );
2634 };
2635 
2636 /** \brief callback object to fill DOM tree
2637 
2638     To be used with XML parser as callback object.
2639 
2640     Creates document according to incoming events.
2641 */
2642 class ldomDocumentWriter : public LVXMLParserCallback
2643 {
2644 protected:
2645     //============================
2646     ldomDocument * _document;
2647     //ldomElement * _currNode;
2648     ldomElementWriter * _currNode;
2649     bool _errFlag;
2650     bool _headerOnly;
2651     bool _popStyleOnFinish;
2652     lUInt16 _stopTagId;
2653     //============================
2654     lUInt32 _flags;
2655     bool _inHeadStyle;
2656     lString32 _headStyleText;
2657     lString32Collection _stylesheetLinks;
ElementCloseHandler(ldomNode * node)2658     virtual void ElementCloseHandler( ldomNode * node ) { node->persist(); }
2659 public:
2660     /// returns flags
getFlags()2661     virtual lUInt32 getFlags() { return _flags; }
2662     /// sets flags
setFlags(lUInt32 flags)2663     virtual void setFlags( lUInt32 flags ) { _flags = flags; }
2664     // overrides
2665     /// called when encoding directive found in document
2666     virtual void OnEncoding( const lChar32 * name, const lChar32 * table );
2667     /// called on parsing start
2668     virtual void OnStart(LVFileFormatParser * parser);
2669     /// called on parsing end
2670     virtual void OnStop();
2671     /// called on opening tag
2672     virtual ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname );
2673     /// called after > of opening tag (when entering tag body)
2674     virtual void OnTagBody();
2675     /// called on closing tag
2676     virtual void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false );
2677     /// called on attribute
2678     virtual void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue );
2679     /// close tags
2680     ldomElementWriter * pop( ldomElementWriter * obj, lUInt16 id );
2681     /// called on text
2682     virtual void OnText( const lChar32 * text, int len, lUInt32 flags );
2683     /// add named BLOB data to document
OnBlob(lString32 name,const lUInt8 * data,int size)2684     virtual bool OnBlob(lString32 name, const lUInt8 * data, int size) {
2685 #if BUILD_LITE!=1
2686         return _document->addBlob(name, data, size);
2687 #else
2688         return false;
2689 #endif
2690     }
2691     /// set document property
OnDocProperty(const char * name,lString8 value)2692     virtual void OnDocProperty(const char * name, lString8 value) { _document->getProps()->setString(name, value); }
2693 
2694     /// constructor
2695     ldomDocumentWriter(ldomDocument * document, bool headerOnly=false );
2696     /// destructor
2697     virtual ~ldomDocumentWriter();
2698 };
2699 
2700 /** \brief callback object to fill DOM tree
2701 
2702     To be used with XML parser as callback object.
2703 
2704     Creates document according to incoming events.
2705 
2706     Autoclose HTML tags.
2707 */
2708 class ldomDocumentWriterFilter : public ldomDocumentWriter
2709 {
2710 protected:
2711     bool _libRuDocumentToDetect;
2712     bool _libRuDocumentDetected;
2713     bool _libRuParagraphStart;
2714     bool _libRuParseAsPre;
2715     lUInt16 _styleAttrId;
2716     lUInt16 _classAttrId;
2717     lUInt16 * _rules[MAX_ELEMENT_TYPE_ID];
2718     bool _tagBodyCalled;
2719     // Some states used when gDOMVersionRequested >= 20200824
2720     bool _htmlTagSeen;
2721     bool _headTagSeen;
2722     bool _bodyTagSeen;
2723     bool _curNodeIsSelfClosing;
2724     bool _curTagIsIgnored;
2725     ldomElementWriter * _curNodeBeforeFostering;
2726     ldomElementWriter * _curFosteredNode;
2727     ldomElementWriter * _lastP;
2728     virtual void AutoClose( lUInt16 tag_id, bool open );
2729     virtual bool AutoOpenClosePop( int step, lUInt16 tag_id );
2730     virtual lUInt16 popUpTo( ldomElementWriter * target, lUInt16 target_id=0, int scope=0 );
2731     virtual bool CheckAndEnsureFosterParenting(lUInt16 tag_id);
ElementCloseHandler(ldomNode * node)2732     virtual void ElementCloseHandler( ldomNode * node ) { node->persist(); }
2733     virtual void appendStyle( const lChar32 * style );
2734     virtual void setClass( const lChar32 * className, bool overrideExisting=false );
2735 public:
2736     /// called on attribute
2737     virtual void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue );
2738     /// called on opening tag
2739     virtual ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname );
2740     /// called after > of opening tag (when entering tag body)
2741     virtual void OnTagBody();
2742     /// called on closing tag
2743     virtual void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false );
2744     /// called on text
2745     virtual void OnText( const lChar32 * text, int len, lUInt32 flags );
2746     /// constructor
2747     ldomDocumentWriterFilter(ldomDocument * document, bool headerOnly, const char *** rules);
2748     /// destructor
2749     virtual ~ldomDocumentWriterFilter();
2750 };
2751 
2752 class ldomDocumentFragmentWriter : public LVXMLParserCallback
2753 {
2754 private:
2755     //============================
2756     LVXMLParserCallback * parent;
2757     lString32 baseTag;
2758     lString32 baseTagReplacement;
2759     lString32 codeBase;
2760     lString32 filePathName;
2761     lString32 codeBasePrefix;
2762     lString32 stylesheetFile;
2763     lString32 tmpStylesheetFile;
2764     lString32Collection stylesheetLinks;
2765     bool insideTag;
2766     int styleDetectionState;
2767     LVHashTable<lString32, lString32> pathSubstitutions;
2768 
2769     ldomNode * baseElement;
2770     ldomNode * lastBaseElement;
2771 
2772     lString8 headStyleText;
2773     int headStyleState;
2774 
2775     lString32 htmlDir;
2776     lString32 htmlLang;
2777     bool insideHtmlTag;
2778 
2779     bool m_nonlinear = false;
2780 
2781 public:
2782 
2783     /// return content of html/head/style element
getHeadStyleText()2784     lString8 getHeadStyleText() { return headStyleText; }
2785 
getBaseElement()2786     ldomNode * getBaseElement() { return lastBaseElement; }
2787 
2788     lString32 convertId( lString32 id );
2789     lString32 convertHref( lString32 href );
2790 
addPathSubstitution(lString32 key,lString32 value)2791     void addPathSubstitution( lString32 key, lString32 value )
2792     {
2793         pathSubstitutions.set(key, value);
2794     }
2795 
2796     virtual void setCodeBase( lString32 filePath );
2797     /// returns flags
getFlags()2798     virtual lUInt32 getFlags() { return parent->getFlags(); }
2799     /// sets flags
setFlags(lUInt32 flags)2800     virtual void setFlags( lUInt32 flags ) { parent->setFlags(flags); }
2801     // overrides
2802     /// called when encoding directive found in document
OnEncoding(const lChar32 * name,const lChar32 * table)2803     virtual void OnEncoding( const lChar32 * name, const lChar32 * table )
2804     { parent->OnEncoding( name, table ); }
2805     /// called on parsing start
OnStart(LVFileFormatParser *)2806     virtual void OnStart(LVFileFormatParser *)
2807     {
2808         insideTag = false;
2809         headStyleText.clear();
2810         headStyleState = 0;
2811         insideHtmlTag = false;
2812         htmlDir.clear();
2813         htmlLang.clear();
2814     }
2815     /// called on parsing end
OnStop()2816     virtual void OnStop()
2817     {
2818         if ( insideTag ) {
2819             insideTag = false;
2820             if ( !baseTagReplacement.empty() ) {
2821                 parent->OnTagClose(U"", baseTagReplacement.c_str());
2822             }
2823             baseElement = NULL;
2824             return;
2825         }
2826         insideTag = false;
2827     }
2828     /// called on opening tag
2829     virtual ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname );
2830     /// called after > of opening tag (when entering tag body)
2831     virtual void OnTagBody();
2832     /// called on closing tag
2833     virtual void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false );
2834     /// called on attribute
2835     virtual void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue );
2836     /// called on text
OnText(const lChar32 * text,int len,lUInt32 flags)2837     virtual void OnText( const lChar32 * text, int len, lUInt32 flags )
2838     {
2839         if (headStyleState == 1) {
2840             headStyleText << UnicodeToUtf8(lString32(text).substr(0,len-1));
2841             return;
2842         }
2843         if ( insideTag )
2844             parent->OnText( text, len, flags );
2845     }
2846     /// add named BLOB data to document
OnBlob(lString32 name,const lUInt8 * data,int size)2847     virtual bool OnBlob(lString32 name, const lUInt8 * data, int size) { return parent->OnBlob(name, data, size); }
2848     /// set document property
OnDocProperty(const char * name,lString8 value)2849     virtual void OnDocProperty(const char * name, lString8 value) { parent->OnDocProperty(name, value); }
2850     // set non-linear flag
setNonLinearFlag(bool nonlinear)2851     virtual void setNonLinearFlag( bool nonlinear ) { m_nonlinear = nonlinear; }
2852     /// constructor
ldomDocumentFragmentWriter(LVXMLParserCallback * parentWriter,lString32 baseTagName,lString32 baseTagReplacementName,lString32 fragmentFilePath)2853     ldomDocumentFragmentWriter( LVXMLParserCallback * parentWriter, lString32 baseTagName, lString32 baseTagReplacementName, lString32 fragmentFilePath )
2854     : parent(parentWriter), baseTag(baseTagName), baseTagReplacement(baseTagReplacementName),
2855     insideTag(false), styleDetectionState(0), pathSubstitutions(100), baseElement(NULL), lastBaseElement(NULL),
2856     headStyleState(0), insideHtmlTag(false)
2857     {
2858         setCodeBase( fragmentFilePath );
2859     }
2860     /// destructor
~ldomDocumentFragmentWriter()2861     virtual ~ldomDocumentFragmentWriter() { }
2862 };
2863 
2864 //utils
2865 /// extract authors from FB2 document, delimiter is lString32 by default
2866 lString32 extractDocAuthors( ldomDocument * doc, lString32 delimiter=lString32::empty_str, bool shortMiddleName=true );
2867 lString32 extractDocTitle( ldomDocument * doc );
2868 lString32 extractDocLanguage( ldomDocument * doc );
2869 /// returns "(Series Name #number)" if pSeriesNumber is NULL, separate name and number otherwise
2870 lString32 extractDocSeries( ldomDocument * doc, int * pSeriesNumber=NULL );
2871 lString32 extractDocKeywords( ldomDocument * doc );
2872 lString32 extractDocDescription( ldomDocument * doc );
2873 
2874 bool IsEmptySpace( const lChar32 * text, int len );
2875 
2876 /// parse XML document from stream, returns NULL if failed
2877 ldomDocument * LVParseXMLStream( LVStreamRef stream,
2878                               const elem_def_t * elem_table=NULL,
2879                               const attr_def_t * attr_table=NULL,
2880                               const ns_def_t * ns_table=NULL );
2881 
2882 /// parse XML document from stream, returns NULL if failed
2883 ldomDocument * LVParseHTMLStream( LVStreamRef stream,
2884                               const elem_def_t * elem_table=NULL,
2885                               const attr_def_t * attr_table=NULL,
2886                               const ns_def_t * ns_table=NULL );
2887 
2888 /// document cache
2889 class ldomDocCache
2890 {
2891 public:
2892     /// open existing cache file stream
2893     static LVStreamRef openExisting( lString32 filename, lUInt32 crc, lUInt32 docFlags, lString32 &cachePath );
2894     /// create new cache file
2895     static LVStreamRef createNew( lString32 filename, lUInt32 crc, lUInt32 docFlags, lUInt32 fileSize, lString32 &cachePath );
2896     /// init document cache
2897     static bool init( lString32 cacheDir, lvsize_t maxSize );
2898     /// close document cache manager
2899     static bool close();
2900     /// delete all cache files
2901     static bool clear();
2902     /// returns true if cache is enabled (successfully initialized)
2903     static bool enabled();
2904 };
2905 
2906 
2907 /// unit test for DOM
2908 void runTinyDomUnitTests();
2909 
2910 /// pass true to enable CRC check for
2911 void enableCacheFileContentsValidation(bool enable);
2912 
2913 /// pass false to not compress data in cache files
2914 void compressCachedData(bool enable);
2915 
2916 /// increase the 4 hardcoded TEXT_CACHE_UNPACKED_SPACE, ELEM_CACHE_UNPACKED_SPACE,
2917 // RECT_CACHE_UNPACKED_SPACE and STYLE_CACHE_UNPACKED_SPACE by this factor
2918 void setStorageMaxUncompressedSizeFactor(float factor);
2919 
2920 #endif
2921