1 /** \file lvtinydom.h 2 \brief fast and compact XML DOM tree 3 4 CoolReader Engine 5 6 (c) Vadim Lopatin, 2000-2009 7 This source code is distributed under the terms of 8 GNU General Public License 9 See LICENSE file for details 10 11 12 Goal: make fast DOM implementation with small memory footprint. 13 14 2009/04 : Introducing new storage model, optimized for mmap. 15 All DOM objects are divided by 2 parts. 16 1) Short RAM instance 17 2) Data storage part, which could be placed to mmap buffer. 18 19 Document object storage should handle object table and data buffer. 20 Each object has DataIndex, index of entry in object table. 21 Object table holds pointer to RAM instance and data storage for each object. 22 */ 23 24 25 #ifndef __LV_TINYDOM_H_INCLUDED__ 26 #define __LV_TINYDOM_H_INCLUDED__ 27 28 #include "lvmemman.h" 29 #include "lvstring.h" 30 #include "lstridmap.h" 31 #include "lvxml.h" 32 #include "dtddef.h" 33 #include "lvstyles.h" 34 #include "lvdrawbuf.h" 35 #include "lvembeddedfont.h" 36 #include "lvstsheet.h" 37 #include "lvpagesplitter.h" 38 #include "lvptrvec.h" 39 #include "lvhashtable.h" 40 #include "lvimg.h" 41 #include "props.h" 42 #include "bookformats.h" 43 #include "serialbuf.h" 44 #include "lvstring32hashedcollection.h" 45 46 // Allows for requesting older DOM building code (including bugs NOT fixed) 47 extern const int gDOMVersionCurrent; 48 49 // Also defined in src/lvtinydom.cpp 50 #define DOM_VERSION_WITH_NORMALIZED_XPOINTERS 20200223 51 52 #define LXML_NO_DATA 0 ///< to mark data storage record as empty 53 #define LXML_ELEMENT_NODE 1 ///< element node 54 #define LXML_TEXT_NODE 2 ///< text node 55 //#define LXML_DOCUMENT_NODE 3 ///< document node (not implemented) 56 //#define LXML_COMMENT_NODE 4 ///< comment node (not implemented) 57 58 59 /// docFlag mask, enable internal stylesheet of document and style attribute of elements 60 #define DOC_FLAG_ENABLE_INTERNAL_STYLES 1 61 /// docFlag mask, enable paperbook-like footnotes 62 #define DOC_FLAG_ENABLE_FOOTNOTES 2 63 /// docFlag mask, enable preformatted text 64 #define DOC_FLAG_PREFORMATTED_TEXT 4 65 /// docFlag mask, enable document embedded fonts (EPUB) 66 #define DOC_FLAG_ENABLE_DOC_FONTS 8 67 /// docFlag mask, force page breaks on non-linear fragments (EPUB) 68 #define DOC_FLAG_NONLINEAR_PAGEBREAK 16 69 /// default docFlag set 70 #define DOC_FLAG_DEFAULTS (DOC_FLAG_ENABLE_INTERNAL_STYLES|DOC_FLAG_ENABLE_FOOTNOTES|DOC_FLAG_ENABLE_DOC_FONTS) 71 72 73 74 #define LXML_NS_NONE 0 ///< no namespace specified 75 #define LXML_NS_ANY 0xFFFF ///< any namespace can be specified 76 #define LXML_ATTR_VALUE_NONE 0xFFFFFFFF ///< attribute not found 77 78 #define DOC_STRING_HASH_SIZE 256 79 #define RESERVED_DOC_SPACE 4096 80 #define MAX_TYPE_ID 1024 // max of element, ns, attr 81 #define MAX_ELEMENT_TYPE_ID 1024 82 #define MAX_NAMESPACE_TYPE_ID 64 83 #define MAX_ATTRIBUTE_TYPE_ID 1024 84 #define UNKNOWN_ELEMENT_TYPE_ID (MAX_ELEMENT_TYPE_ID>>1) 85 #define UNKNOWN_ATTRIBUTE_TYPE_ID (MAX_ATTRIBUTE_TYPE_ID>>1) 86 #define UNKNOWN_NAMESPACE_TYPE_ID (MAX_NAMESPACE_TYPE_ID>>1) 87 88 // document property names 89 #define DOC_PROP_AUTHORS "doc.authors" 90 #define DOC_PROP_TITLE "doc.title" 91 #define DOC_PROP_LANGUAGE "doc.language" 92 #define DOC_PROP_DESCRIPTION "doc.description" 93 #define DOC_PROP_KEYWORDS "doc.keywords" 94 #define DOC_PROP_SERIES_NAME "doc.series.name" 95 #define DOC_PROP_SERIES_NUMBER "doc.series.number" 96 #define DOC_PROP_ARC_NAME "doc.archive.name" 97 #define DOC_PROP_ARC_PATH "doc.archive.path" 98 #define DOC_PROP_ARC_SIZE "doc.archive.size" 99 #define DOC_PROP_ARC_FILE_COUNT "doc.archive.file.count" 100 #define DOC_PROP_FILE_NAME "doc.file.name" 101 #define DOC_PROP_FILE_PATH "doc.file.path" 102 #define DOC_PROP_FILE_SIZE "doc.file.size" 103 #define DOC_PROP_FILE_FORMAT "doc.file.format" 104 #define DOC_PROP_FILE_FORMAT_ID "doc.file.format.id" 105 #define DOC_PROP_FILE_CRC32 "doc.file.crc32" 106 #define DOC_PROP_CODE_BASE "doc.file.code.base" 107 #define DOC_PROP_COVER_FILE "doc.cover.file" 108 109 #define DEF_SPACE_WIDTH_SCALE_PERCENT 100 110 #define DEF_MIN_SPACE_CONDENSING_PERCENT 50 111 #define DEF_UNUSED_SPACE_THRESHOLD_PERCENT 5 112 #define DEF_MAX_ADDED_LETTER_SPACING_PERCENT 0 113 114 #define NODE_DISPLAY_STYLE_HASH_UNINITIALIZED 0xFFFFFFFF 115 116 // To be used for 'direction' in ldomNode->elementFromPoint(lvPoint pt, int direction) 117 // and ldomDocument->createXPointer(lvPoint pt, int direction...) as a way to 118 // self-document what's expected (but the code does > and < comparisons, so 119 // don't change these values - some clients may also already use 0/1/-1). 120 // Use PT_DIR_EXACT to find the exact node at pt (with y AND x check), 121 // which is needed when selecting text or checking if tap is on a link, 122 // (necessary in table cells or floats, and in RTL text). 123 // Use PT_DIR_SCAN_* when interested only in finding the slice of a page 124 // at y (eg. to get the current page top), finding the nearest node in 125 // direction if pt.y happens to be in some node margin area. 126 // Use PT_DIR_SCAN_BACKWARD_LOGICAL_* when looking a whole page range 127 // xpointers, to not miss words on first or last line in bidi/RTL text. 128 #define PT_DIR_SCAN_BACKWARD_LOGICAL_LAST -3 129 #define PT_DIR_SCAN_BACKWARD_LOGICAL_FIRST -2 130 #define PT_DIR_SCAN_BACKWARD -1 131 #define PT_DIR_EXACT 0 132 #define PT_DIR_SCAN_FORWARD 1 133 #define PT_DIR_SCAN_FORWARD_LOGICAL_FIRST 2 134 #define PT_DIR_SCAN_FORWARD_LOGICAL_LAST 3 135 136 137 //#if BUILD_LITE!=1 138 /// final block cache 139 typedef LVRef<LFormattedText> LFormattedTextRef; 140 typedef LVCacheMap< ldomNode *, LFormattedTextRef> CVRendBlockCache; 141 //#endif 142 143 144 //#define LDOM_USE_OWN_MEM_MAN 0 145 /// XPath step kind 146 typedef enum { 147 xpath_step_error = 0, // error 148 xpath_step_element, // element of type 'name' with 'index' /elemname[N]/ 149 xpath_step_text, // text node with 'index' /text()[N]/ 150 xpath_step_nodeindex, // node index /N/ 151 xpath_step_point // point index .N 152 } xpath_step_t; 153 xpath_step_t ParseXPathStep( const lChar8 * &path, lString8 & name, int & index ); 154 155 /// return value for continuous operations 156 typedef enum { 157 CR_DONE, ///< operation is finished successfully 158 CR_TIMEOUT, ///< operation is incomplete - interrupted by timeout 159 CR_ERROR ///< error while executing operation 160 } ContinuousOperationResult; 161 162 /// type of image scaling 163 typedef enum { 164 IMG_NO_SCALE, /// scaling is disabled 165 IMG_INTEGER_SCALING, /// integer multipier/divisor scaling -- *2, *3 only 166 IMG_FREE_SCALING /// free scaling, non-integer factor 167 } img_scaling_mode_t; 168 169 enum XPointerMode { 170 XPATH_USE_NAMES = 0, 171 XPATH_USE_INDEXES 172 }; 173 174 /// image scaling option 175 struct img_scaling_option_t { 176 img_scaling_mode_t mode; 177 int max_scale; getHashimg_scaling_option_t178 int getHash() { return (int)mode * 33 + max_scale; } 179 // creates default option value 180 img_scaling_option_t(); 181 }; 182 183 /// set of images scaling options for different kind of images 184 struct img_scaling_options_t { 185 img_scaling_option_t zoom_in_inline; 186 img_scaling_option_t zoom_in_block; 187 img_scaling_option_t zoom_out_inline; 188 img_scaling_option_t zoom_out_block; 189 /// returns hash value getHashimg_scaling_options_t190 int getHash() { return (((zoom_in_inline.getHash()*33 + zoom_in_block.getHash())*33 + zoom_out_inline.getHash())*33 + zoom_out_block.getHash()); } 191 /// creates default options 192 img_scaling_options_t(); 193 /// returns true if any changes occured 194 bool update( CRPropRef props, int fontSize ); 195 }; 196 197 //#if BUILD_LITE!=1 198 struct DataStorageItemHeader; 199 struct TextDataStorageItem; 200 struct ElementDataStorageItem; 201 struct NodeItem; 202 class DataBuffer; 203 //#endif 204 205 206 /// DocView Callback interface - track progress, external links, etc. 207 class LVDocViewCallback { 208 public: 209 /// on starting file loading OnLoadFileStart(lString32 filename)210 virtual void OnLoadFileStart( lString32 filename ) { CR_UNUSED(filename); } 211 /// format detection finished OnLoadFileFormatDetected(doc_format_t)212 virtual void OnLoadFileFormatDetected( doc_format_t /*fileFormat*/) { } 213 /// file loading is finished successfully - drawCoveTo() may be called there OnLoadFileEnd()214 virtual void OnLoadFileEnd() { } 215 /// first page is loaded from file an can be formatted for preview OnLoadFileFirstPagesReady()216 virtual void OnLoadFileFirstPagesReady() { } 217 /// file progress indicator, called with values 0..100 OnLoadFileProgress(int)218 virtual void OnLoadFileProgress( int /*percent*/) { } 219 /// file load finiished with error OnLoadFileError(lString32)220 virtual void OnLoadFileError(lString32 /*message*/) { } 221 /// node style update started OnNodeStylesUpdateStart()222 virtual void OnNodeStylesUpdateStart() { } 223 /// node style update finished OnNodeStylesUpdateEnd()224 virtual void OnNodeStylesUpdateEnd() { } 225 /// node style update progress, called with values 0..100 OnNodeStylesUpdateProgress(int)226 virtual void OnNodeStylesUpdateProgress(int /*percent*/) { } 227 /// document formatting started OnFormatStart()228 virtual void OnFormatStart() { } 229 /// document formatting finished OnFormatEnd()230 virtual void OnFormatEnd() { } 231 /// format progress, called with values 0..100 OnFormatProgress(int)232 virtual void OnFormatProgress(int /*percent*/) { } 233 /// document fully loaded and rendered (follows OnFormatEnd(), or OnLoadFileEnd() when loaded from cache) OnDocumentReady()234 virtual void OnDocumentReady() { } 235 /// format progress, called with values 0..100 OnExportProgress(int)236 virtual void OnExportProgress(int /*percent*/) { } 237 /// Override to handle external links OnExternalLink(lString32,ldomNode *)238 virtual void OnExternalLink(lString32 /*url*/, ldomNode * /*node*/) { } 239 /// Called when page images should be invalidated (clearImageCache() called in LVDocView) OnImageCacheClear()240 virtual void OnImageCacheClear() { } 241 /// return true if reload will be processed by external code, false to let internal code process it OnRequestReload()242 virtual bool OnRequestReload() { return false; } 243 /// save cache file started OnSaveCacheFileStart()244 virtual void OnSaveCacheFileStart() { } 245 /// save cache file finished OnSaveCacheFileEnd()246 virtual void OnSaveCacheFileEnd() { } 247 /// save cache file progress, called with values 0..100 OnSaveCacheFileProgress(int)248 virtual void OnSaveCacheFileProgress(int /*percent*/) { } 249 /// destructor ~LVDocViewCallback()250 virtual ~LVDocViewCallback() { } 251 }; 252 253 class CacheLoadingCallback 254 { 255 public: 256 /// called when format of document being loaded from cache became known 257 virtual void OnCacheFileFormatDetected( doc_format_t ) = 0; ~CacheLoadingCallback()258 virtual ~CacheLoadingCallback() { } 259 }; 260 261 262 class ldomTextStorageChunk; 263 class ldomTextStorageChunkBuilder; 264 struct ElementDataStorageItem; 265 class CacheFile; 266 class tinyNodeCollection; 267 268 struct ldomNodeStyleInfo 269 { 270 lUInt16 _fontIndex; 271 lUInt16 _styleIndex; 272 }; 273 274 class ldomBlobItem; 275 #define BLOB_NAME_PREFIX U"@blob#" 276 #define MOBI_IMAGE_NAME_PREFIX U"mobi_image_" 277 class ldomBlobCache 278 { 279 CacheFile * _cacheFile; 280 LVPtrVector<ldomBlobItem> _list; 281 bool _changed; 282 bool loadIndex(); 283 bool saveIndex(); 284 public: 285 ldomBlobCache(); 286 void setCacheFile( CacheFile * cacheFile ); 287 ContinuousOperationResult saveToCache(CRTimerUtil & timeout); 288 bool addBlob( const lUInt8 * data, int size, lString32 name ); 289 LVStreamRef getBlob( lString32 name ); 290 }; 291 292 class ldomDataStorageManager 293 { 294 friend class ldomTextStorageChunk; 295 protected: 296 tinyNodeCollection * _owner; 297 LVPtrVector<ldomTextStorageChunk> _chunks; 298 ldomTextStorageChunk * _activeChunk; 299 ldomTextStorageChunk * _recentChunk; 300 CacheFile * _cache; 301 lUInt32 _uncompressedSize; 302 lUInt32 _maxUncompressedSize; 303 lUInt32 _chunkSize; 304 char _type; /// type, to show in log 305 bool _maxSizeReachedWarned; 306 ldomTextStorageChunk * getChunk( lUInt32 address ); 307 public: 308 /// type 309 lUInt16 cacheType(); 310 /// saves all unsaved chunks to cache file 311 bool save( CRTimerUtil & maxTime ); 312 /// load chunk index from cache file 313 bool load(); 314 /// sets cache file 315 void setCache( CacheFile * cache ); 316 /// checks buffer sizes, compacts most unused chunks 317 void compact( int reservedSpace , const ldomTextStorageChunk *excludedChunk = NULL ); getUncompressedSize()318 lUInt32 getUncompressedSize() { return _uncompressedSize; } 319 #if BUILD_LITE!=1 320 /// allocates new text node, return its address inside storage 321 lUInt32 allocText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text ); 322 /// allocates storage for new element, returns address address inside storage 323 lUInt32 allocElem( lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount ); 324 /// get text by address 325 lString8 getText( lUInt32 address ); 326 /// get pointer to text data 327 TextDataStorageItem * getTextItem( lUInt32 addr ); 328 /// get pointer to element data 329 ElementDataStorageItem * getElem( lUInt32 addr ); 330 /// change node's parent, returns true if modified 331 bool setParent( lUInt32 address, lUInt32 parent ); 332 /// returns node's parent by address 333 lUInt32 getParent( lUInt32 address ); 334 /// free data item 335 void freeNode( lUInt32 addr ); 336 /// call to invalidate chunk if content is modified 337 void modified( lUInt32 addr ); 338 /// return true if some chunks have been allocated hasChunks()339 bool hasChunks() { return _chunks.length() > 0; } 340 #endif 341 342 /// get or allocate space for rect data item 343 void getRendRectData( lUInt32 elemDataIndex, lvdomElementFormatRec * dst ); 344 /// set rect data item 345 void setRendRectData( lUInt32 elemDataIndex, const lvdomElementFormatRec * src ); 346 347 /// get or allocate space for element style data item 348 void getStyleData( lUInt32 elemDataIndex, ldomNodeStyleInfo * dst ); 349 /// set element style data item 350 void setStyleData( lUInt32 elemDataIndex, const ldomNodeStyleInfo * src ); 351 352 ldomDataStorageManager( tinyNodeCollection * owner, char type, lUInt32 maxUnpackedSize, lUInt32 chunkSize ); 353 ~ldomDataStorageManager(); 354 }; 355 356 /// class to store compressed/uncompressed text nodes chunk 357 class ldomTextStorageChunk 358 { 359 friend class ldomDataStorageManager; 360 ldomDataStorageManager * _manager; 361 ldomTextStorageChunk * _nextRecent; 362 ldomTextStorageChunk * _prevRecent; 363 lUInt8 * _buf; /// buffer for uncompressed data 364 lUInt32 _bufsize; /// _buf (uncompressed) area size, bytes 365 lUInt32 _bufpos; /// _buf (uncompressed) data write position (for appending of new data) 366 lUInt16 _index; /// ? index of chunk in storage 367 char _type; /// type, to show in log 368 bool _saved; 369 370 void setunpacked( const lUInt8 * buf, int bufsize ); 371 /// pack data, and remove unpacked 372 void compact(); 373 #if BUILD_LITE!=1 374 /// pack data, and remove unpacked, put packed data to cache file 375 bool swapToCache( bool removeFromMemory ); 376 /// read packed data from cache 377 bool restoreFromCache(); 378 #endif 379 /// unpacks chunk, if packed; checks storage space, compact if necessary 380 void ensureUnpacked(); 381 #if BUILD_LITE!=1 382 /// free data item 383 void freeNode( int offset ); 384 /// saves data to cache file, if unsaved 385 bool save(); 386 #endif 387 public: 388 /// call to invalidate chunk if content is modified 389 void modified(); 390 /// returns chunk index inside collection getIndex()391 int getIndex() { return _index; } 392 /// returns free space in buffer 393 int space(); 394 /// adds new text item to buffer, returns offset inside chunk of stored data 395 int addText( lUInt32 dataIndex, lUInt32 parentIndex, const lString8 & text ); 396 /// adds new element item to buffer, returns offset inside chunk of stored data 397 int addElem( lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount ); 398 /// get text item from buffer by offset 399 lString8 getText( int offset ); 400 /// get node parent by offset 401 lUInt32 getParent( int offset ); 402 /// set node parent by offset 403 bool setParent( int offset, lUInt32 parentIndex ); 404 /// get pointer to element data 405 ElementDataStorageItem * getElem( int offset ); 406 /// get raw data bytes 407 void getRaw( int offset, int size, lUInt8 * buf ); 408 /// set raw data bytes 409 void setRaw( int offset, int size, const lUInt8 * buf ); 410 /// create empty buffer 411 ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index); 412 /// create chunk to be read from cache file 413 ldomTextStorageChunk(ldomDataStorageManager * manager, lUInt16 index, lUInt32 compsize, lUInt32 uncompsize); 414 /// create with preallocated buffer, for raw access 415 ldomTextStorageChunk(lUInt32 preAllocSize, ldomDataStorageManager * manager, lUInt16 index); 416 ~ldomTextStorageChunk(); 417 }; 418 419 // forward declaration 420 struct ldomNode; 421 422 // About these #define TNC_PART_* : 423 // A ldomNode unique reference is defined by: 424 // struct ldomNodeHandle { /// compact 32bit value for node 425 // unsigned _docIndex:8; // index in ldomNode::_documentInstances[MAX_DOCUMENT_INSTANCE_COUNT]; 426 // unsigned _dataIndex:24; // index of node in document's storage and type 427 // }; 428 // The 24 bits of _dataIndex are used that way: 429 // return &(_elemList[index>>TNC_PART_INDEX_SHIFT][(index>>4)&TNC_PART_MASK]); 430 // #define TNTYPE (_handle._dataIndex&0x0F) 431 // #define TNINDEX (_handle._dataIndex&(~0x0E)) 432 // 24>15 10bits (1024 values) : index in the first-level _elemList[TNC_PART_COUNT] 433 // 14> 5 10bits (1024 values) : sub-index in second-level _elemList[first_index][] 434 // 4> 1 4bits (16 values) : type (bit 1: text | element, bit 2: mutable | permanent) 435 // (bit 3 and 4 are not used, so we could grab 2 more bits from here if needed) 436 // 437 // We can update ldomNodeHandle to: 438 // struct ldomNodeHandle { 439 // unsigned _docIndex:4; // decreasing MAX_DOCUMENT_INSTANCE_COUNT from 256 to 16 440 // unsigned _dataIndex:28; // get 4 more bits that we can distribute to these indexes. 441 // }; 442 // The other #define below (and possibly the code too) assume the same TNC_PART_SHIFT for both indexes, 443 // so let's distribute 2 bits to each: 444 // 28>17 12bits (4096 values) : index in the first-level _elemList[TNC_PART_COUNT] 445 // 16> 5 12bits (4096 values) : sub-index in second-level _elemList[first_index][] 446 // 4> 1 4bits (16 values) 447 // With that, we have increased the max number of text nodes and the max number of 448 // element nodes from 1024x1024 (1M) to 4096x4096 (16M) which allows loading very large books. 449 450 //#define TNC_PART_COUNT 1024 451 //#define TNC_PART_SHIFT 10 452 #define TNC_PART_COUNT 4096 453 #define TNC_PART_SHIFT 12 454 #define TNC_PART_INDEX_SHIFT (TNC_PART_SHIFT+4) 455 #define TNC_PART_LEN (1<<TNC_PART_SHIFT) 456 #define TNC_PART_MASK (TNC_PART_LEN-1) 457 /// storage of ldomNode 458 class tinyNodeCollection 459 { 460 friend struct ldomNode; 461 friend class tinyElement; 462 friend class ldomDocument; 463 private: 464 int _textCount; 465 lUInt32 _textNextFree; 466 ldomNode * _textList[TNC_PART_COUNT]; 467 int _elemCount; 468 lUInt32 _elemNextFree; 469 ldomNode * _elemList[TNC_PART_COUNT]; 470 LVIndexedRefCache<css_style_ref_t> _styles; 471 LVIndexedRefCache<font_ref_t> _fonts; 472 int _tinyElementCount; 473 int _itemCount; 474 int _docIndex; 475 476 protected: 477 #if BUILD_LITE!=1 478 /// final block cache 479 CVRendBlockCache _renderedBlockCache; 480 CacheFile * _cacheFile; 481 bool _cacheFileStale; 482 bool _cacheFileLeaveAsDirty; 483 bool _mapped; 484 bool _maperror; 485 int _mapSavingStage; 486 487 img_scaling_options_t _imgScalingOptions; 488 int _spaceWidthScalePercent; 489 int _minSpaceCondensingPercent; 490 int _unusedSpaceThresholdPercent; 491 int _maxAddedLetterSpacingPercent; 492 493 lUInt32 _nodeStyleHash; 494 lUInt32 _nodeDisplayStyleHash; 495 lUInt32 _nodeDisplayStyleHashInitial; 496 bool _nodeStylesInvalidIfLoading; 497 498 int calcFinalBlocks(); 499 void dropStyles(); 500 #endif 501 bool _hangingPunctuationEnabled; 502 lUInt32 _renderBlockRenderingFlags; 503 lUInt32 _DOMVersionRequested; 504 int _interlineScaleFactor; 505 506 ldomDataStorageManager _textStorage; // persistent text node data storage 507 ldomDataStorageManager _elemStorage; // persistent element data storage 508 ldomDataStorageManager _rectStorage; // element render rect storage 509 ldomDataStorageManager _styleStorage;// element style storage (font & style indexes ldomNodeStyleInfo) 510 511 CRPropRef _docProps; 512 lUInt32 _docFlags; // document flags 513 514 int _styleIndex; 515 516 LVStyleSheet _stylesheet; 517 518 LVHashTable<lUInt16, lUInt16> _fontMap; // style index to font index 519 520 /// checks buffer sizes, compacts most unused chunks 521 ldomBlobCache _blobCache; 522 523 /// uniquie id of file format parsing option (usually 0, but 1 for preformatted text files) 524 int getPersistenceFlags(); 525 526 #if BUILD_LITE!=1 527 bool saveStylesData(); 528 bool loadStylesData(); 529 bool updateLoadedStyles( bool enabled ); 530 lUInt32 calcStyleHash(bool already_rendered); 531 bool saveNodeData(); 532 bool saveNodeData( lUInt16 type, ldomNode ** list, int nodecount ); 533 bool loadNodeData(); 534 bool loadNodeData( lUInt16 type, ldomNode ** list, int nodecount ); 535 hasRenderData()536 bool hasRenderData() { return _rectStorage.hasChunks(); } 537 538 bool openCacheFile(); 539 540 void setNodeStyleIndex( lUInt32 dataIndex, lUInt16 index ); 541 void setNodeFontIndex( lUInt32 dataIndex, lUInt16 index ); 542 lUInt16 getNodeStyleIndex( lUInt32 dataIndex ); 543 lUInt16 getNodeFontIndex( lUInt32 dataIndex ); 544 css_style_ref_t getNodeStyle( lUInt32 dataIndex ); 545 font_ref_t getNodeFont( lUInt32 dataIndex ); 546 void setNodeStyle( lUInt32 dataIndex, css_style_ref_t & v ); 547 void setNodeFont( lUInt32 dataIndex, font_ref_t & v ); 548 void clearNodeStyle( lUInt32 dataIndex ); resetNodeNumberingProps()549 virtual void resetNodeNumberingProps() { } 550 #endif 551 552 /// creates empty collection 553 tinyNodeCollection(); 554 tinyNodeCollection( tinyNodeCollection & v ); 555 556 public: 557 558 #if BUILD_LITE!=1 getSpaceWidthScalePercent()559 int getSpaceWidthScalePercent() { 560 return _spaceWidthScalePercent; 561 } 562 setSpaceWidthScalePercent(int spaceWidthScalePercent)563 bool setSpaceWidthScalePercent(int spaceWidthScalePercent) { 564 if (spaceWidthScalePercent == _spaceWidthScalePercent) 565 return false; 566 _spaceWidthScalePercent = spaceWidthScalePercent; 567 return true; 568 } 569 setMinSpaceCondensingPercent(int minSpaceCondensingPercent)570 bool setMinSpaceCondensingPercent(int minSpaceCondensingPercent) { 571 if (minSpaceCondensingPercent == _minSpaceCondensingPercent) 572 return false; 573 _minSpaceCondensingPercent = minSpaceCondensingPercent; 574 return true; 575 } 576 setUnusedSpaceThresholdPercent(int unusedSpaceThresholdPercent)577 bool setUnusedSpaceThresholdPercent(int unusedSpaceThresholdPercent) { 578 if (unusedSpaceThresholdPercent == _unusedSpaceThresholdPercent) 579 return false; 580 _unusedSpaceThresholdPercent = unusedSpaceThresholdPercent; 581 return true; 582 } 583 setMaxAddedLetterSpacingPercent(int maxAddedLetterSpacingPercent)584 bool setMaxAddedLetterSpacingPercent(int maxAddedLetterSpacingPercent) { 585 if (maxAddedLetterSpacingPercent == _maxAddedLetterSpacingPercent) 586 return false; 587 _maxAddedLetterSpacingPercent = maxAddedLetterSpacingPercent; 588 // This does not need to trigger a re-rendering, just 589 // a re-formatting of the final blocks 590 _renderedBlockCache.clear(); 591 return true; 592 } 593 594 /// add named BLOB data to document addBlob(lString32 name,const lUInt8 * data,int size)595 bool addBlob(lString32 name, const lUInt8 * data, int size) { _cacheFileStale = true ; return _blobCache.addBlob(data, size, name); } 596 /// get BLOB by name getBlob(lString32 name)597 LVStreamRef getBlob(lString32 name) { return _blobCache.getBlob(name); } 598 599 /// called on document loading end 600 bool validateDocument(); 601 602 /// swaps to cache file or saves changes, limited by time interval (can be called again to continue after TIMEOUT) 603 virtual ContinuousOperationResult swapToCache(CRTimerUtil & maxTime) = 0; 604 /// try opening from cache file, find by source file name (w/o path) and crc32 605 virtual bool openFromCache( CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback=NULL ) = 0; 606 /// saves recent changes to mapped file, with timeout (can be called again to continue after TIMEOUT) 607 virtual ContinuousOperationResult updateMap(CRTimerUtil & maxTime, LVDocViewCallback * progressCallback=NULL) = 0; 608 /// saves recent changes to mapped file 609 virtual bool updateMap(LVDocViewCallback * progressCallback=NULL) { 610 CRTimerUtil infinite; 611 return updateMap(infinite, progressCallback)!=CR_ERROR; 612 } 613 614 bool swapToCacheIfNecessary(); 615 616 617 bool createCacheFile(); 618 #endif 619 getHangingPunctiationEnabled()620 bool getHangingPunctiationEnabled() const { 621 return _hangingPunctuationEnabled; 622 } 623 bool setHangingPunctiationEnabled(bool value); 624 getRenderBlockRenderingFlags()625 lUInt32 getRenderBlockRenderingFlags() const { 626 return _renderBlockRenderingFlags; 627 } 628 bool setRenderBlockRenderingFlags(lUInt32 flags); 629 getDOMVersionRequested()630 lUInt32 getDOMVersionRequested() const { 631 return _DOMVersionRequested; 632 } 633 bool setDOMVersionRequested(lUInt32 version); 634 getInterlineScaleFactor()635 int getInterlineScaleFactor() const { 636 return _interlineScaleFactor; 637 } 638 bool setInterlineScaleFactor(int value); 639 getDocFlag(lUInt32 mask)640 inline bool getDocFlag( lUInt32 mask ) 641 { 642 return (_docFlags & mask) != 0; 643 } 644 645 void setDocFlag( lUInt32 mask, bool value ); 646 getDocFlags()647 inline lUInt32 getDocFlags() 648 { 649 return _docFlags; 650 } 651 getDocIndex()652 inline int getDocIndex() 653 { 654 return _docIndex; 655 } 656 getFontContextDocIndex()657 inline int getFontContextDocIndex() 658 { 659 return (_docFlags & DOC_FLAG_ENABLE_DOC_FONTS) && (_docFlags & DOC_FLAG_ENABLE_INTERNAL_STYLES) ? _docIndex : -1; 660 } 661 662 void setDocFlags( lUInt32 value ); 663 664 665 /// returns doc properties collection getProps()666 inline CRPropRef getProps() { return _docProps; } 667 /// returns doc properties collection setProps(CRPropRef props)668 void setProps( CRPropRef props ) { _docProps = props; } 669 670 #if BUILD_LITE!=1 671 /// set cache file stale flag setCacheFileStale(bool stale)672 void setCacheFileStale( bool stale ) { _cacheFileStale = stale; } 673 674 /// is built (and cached) DOM possibly invalid (can happen when some nodes have changed display style) isBuiltDomStale()675 bool isBuiltDomStale() { 676 return _nodeDisplayStyleHashInitial != NODE_DISPLAY_STYLE_HASH_UNINITIALIZED && 677 _nodeDisplayStyleHash != _nodeDisplayStyleHashInitial; 678 } setNodeStylesInvalidIfLoading()679 void setNodeStylesInvalidIfLoading() { 680 _nodeStylesInvalidIfLoading = true; 681 } 682 683 /// if a cache file is in use hasCacheFile()684 bool hasCacheFile() { return _cacheFile != NULL; } 685 /// set cache file as dirty, so it's not re-used on next load invalidateCacheFile()686 void invalidateCacheFile() { _cacheFileLeaveAsDirty = true; } 687 /// get cache file full path 688 lString32 getCacheFilePath(); 689 #endif 690 691 /// minimize memory consumption 692 void compact(); 693 /// dumps memory usage statistics to debug log 694 void dumpStatistics(); 695 /// get memory usage statistics 696 lString32 getStatistics(); 697 698 /// get ldomNode instance pointer 699 ldomNode * getTinyNode( lUInt32 index ); 700 /// allocate new ldomNode 701 ldomNode * allocTinyNode( int type ); 702 /// allocate new tinyElement 703 ldomNode * allocTinyElement( ldomNode * parent, lUInt16 nsid, lUInt16 id ); 704 /// recycle ldomNode on node removing 705 void recycleTinyNode( lUInt32 index ); 706 707 708 709 #if BUILD_LITE!=1 710 /// put all object into persistent storage 711 virtual void persist( CRTimerUtil & maxTime ); 712 #endif 713 714 715 /// destroys collection 716 virtual ~tinyNodeCollection(); 717 }; 718 719 class ldomDocument; 720 class tinyElement; 721 struct lxmlAttribute; 722 723 #if BUILD_LITE!=1 724 class RenderRectAccessor : public lvdomElementFormatRec 725 { 726 ldomNode * _node; 727 bool _modified; 728 bool _dirty; 729 public: 730 //RenderRectAccessor & operator -> () { return *this; } 731 int getX(); 732 int getY(); 733 int getWidth(); 734 int getHeight(); 735 void getRect( lvRect & rc ); 736 void setX( int x ); 737 void setY( int y ); 738 void setWidth( int w ); 739 void setHeight( int h ); 740 741 int getInnerWidth(); 742 int getInnerX(); 743 int getInnerY(); 744 void setInnerX( int x ); 745 void setInnerY( int y ); 746 void setInnerWidth( int w ); 747 748 int getUsableLeftOverflow(); 749 int getUsableRightOverflow(); 750 void setUsableLeftOverflow( int dx ); 751 void setUsableRightOverflow( int dx ); 752 753 int getTopOverflow(); 754 int getBottomOverflow(); 755 void setTopOverflow( int dy ); 756 void setBottomOverflow( int dy ); 757 758 int getBaseline(); 759 void setBaseline( int baseline ); 760 int getListPropNodeIndex(); 761 void setListPropNodeIndex( int idx ); 762 int getLangNodeIndex(); 763 void setLangNodeIndex( int idx ); 764 765 unsigned short getFlags(); 766 void setFlags( unsigned short flags ); 767 768 void getTopRectsExcluded( int & lw, int & lh, int & rw, int & rh ); 769 void setTopRectsExcluded( int lw, int lh, int rw, int rh ); 770 void getNextFloatMinYs( int & left, int & right ); 771 void setNextFloatMinYs( int left, int right ); 772 void getInvolvedFloatIds( int & float_count, lUInt32 * float_ids ); 773 void setInvolvedFloatIds( int float_count, lUInt32 * float_ids ); 774 775 void push(); 776 void clear(); 777 RenderRectAccessor( ldomNode * node ); 778 ~RenderRectAccessor(); 779 }; 780 #endif 781 782 /// compact 32bit value for node 783 struct ldomNodeHandle { 784 // See comment above around #define TNC_PART_COUNT and TNC_PART_SHIFT changes 785 // Original crengine field sizes: 786 // unsigned _docIndex:8; 787 // unsigned _dataIndex:24; 788 unsigned _docIndex:4; // index in ldomNode::_documentInstances[MAX_DOCUMENT_INSTANCE_COUNT]; 789 unsigned _dataIndex:28; // index of node in document's storage and type 790 }; 791 792 /// max number which could be stored in ldomNodeHandle._docIndex 793 // #define MAX_DOCUMENT_INSTANCE_COUNT 256 794 #define MAX_DOCUMENT_INSTANCE_COUNT 16 795 796 797 class ldomTextNode; 798 // no vtable, very small size (16 bytes) 799 // optimized for 32 bit systems 800 struct ldomNode 801 { 802 friend class tinyNodeCollection; 803 friend class RenderRectAccessor; 804 friend class NodeImageProxy; 805 friend class ldomDocument; 806 807 private: 808 809 static ldomDocument * _documentInstances[MAX_DOCUMENT_INSTANCE_COUNT]; 810 811 /// adds document to list, returns ID of allocated document, -1 if no space in instance array 812 static int registerDocument( ldomDocument * doc ); 813 /// removes document from list 814 static void unregisterDocument( ldomDocument * doc ); 815 816 // types for _handle._type 817 enum { 818 NT_TEXT=0, // mutable text node 819 NT_ELEMENT=1 // mutable element node 820 #if BUILD_LITE!=1 821 , 822 NT_PTEXT=2, // immutable (persistent) text node 823 NT_PELEMENT=3 // immutable (persistent) element node 824 #endif 825 }; 826 827 /// 0: packed 32bit data field 828 ldomNodeHandle _handle; // _docIndex, _dataIndex, _type 829 830 /// 4: misc data 4 bytes (8 bytes on x64) 831 union { // [8] 8 bytes (16 bytes on x64) 832 ldomTextNode * _text_ptr; // NT_TEXT: mutable text node pointer 833 tinyElement * _elem_ptr; // NT_ELEMENT: mutable element pointer 834 #if BUILD_LITE!=1 835 lUInt32 _pelem_addr; // NT_PELEMENT: element storage address: chunk+offset 836 lUInt32 _ptext_addr; // NT_PTEXT: persistent text storage address: chunk+offset 837 #endif 838 lUInt32 _nextFreeIndex; // NULL for removed items 839 } _data; 840 841 842 /// sets document for node setDocumentIndexldomNode843 inline void setDocumentIndex( int index ) { _handle._docIndex = index; } 844 void setStyleIndexInternal( lUInt16 index ); 845 void setFontIndexInternal( lUInt16 index ); 846 847 848 #define TNTYPE (_handle._dataIndex&0x0F) 849 #define TNINDEX (_handle._dataIndex&(~0x0E)) 850 #define TNCHUNK (_addr>>&(~0x0F)) 851 void onCollectionDestroy(); getTinyNodeldomNode852 inline ldomNode * getTinyNode( lUInt32 index ) const { return ((tinyNodeCollection*)getDocument())->getTinyNode(index); } 853 deleteldomNode854 void operator delete(void *) 855 { 856 // Do nothing. Just to disable delete. 857 } 858 859 /// changes parent of item 860 void setParentNode( ldomNode * newParent ); 861 /// add child 862 void addChild( lInt32 childNodeIndex ); 863 864 /// call to invalidate cache if persistent node content is modified 865 void modified(); 866 867 /// returns copy of render data structure 868 void getRenderData( lvdomElementFormatRec & dst); 869 /// sets new value for render data structure 870 void setRenderData( lvdomElementFormatRec & newData); 871 872 void autoboxChildren( int startIndex, int endIndex, bool handleFloating=false ); 873 void removeChildren( int startIndex, int endIndex ); 874 bool cleanIfOnlyEmptyTextInline( bool handleFloating=false ); 875 /// returns true if element has inline content (non empty text, images, <BR>) 876 bool hasNonEmptyInlineContent( bool ignoreFloats=false ); 877 878 public: 879 #if BUILD_LITE!=1 880 // Generic version of autoboxChildren() without any specific inline/block checking, 881 // accepting any element id (from the enum el_*, like el_div, el_tabularBox) as 882 // the wrapping element. 883 ldomNode * boxWrapChildren( int startIndex, int endIndex, lUInt16 elementId ); 884 885 // Ensure this node has a ::before/::after pseudo element as 886 // child, creating it if needed and possible 887 void ensurePseudoElement( bool is_before ); 888 889 /// if stylesheet file name is set, and file is found, set stylesheet to its value 890 bool applyNodeStylesheet(); 891 892 bool initNodeFont(); 893 void initNodeStyle(); 894 /// init render method for this node only (children should already have rend method set) 895 void initNodeRendMethod(); 896 /// init render method for the whole subtree 897 void initNodeRendMethodRecursive(); 898 /// init render method for the whole subtree 899 void initNodeStyleRecursive( LVDocViewCallback * progressCallback ); 900 #endif 901 902 903 /// remove node, clear resources 904 void destroy(); 905 906 /// returns true for invalid/deleted node ot NULL this pointer isNullldomNode907 inline bool isNull() const { return _handle._dataIndex==0 || getDocument() == NULL; } 908 /// returns true if node is stored in persistent storage isPersistentldomNode909 inline bool isPersistent() const { return (_handle._dataIndex&2)!=0; } 910 /// returns data index of node's registration in document data storage getDataIndexldomNode911 inline lInt32 getDataIndex() const { return TNINDEX; } 912 /// returns pointer to document getDocumentldomNode913 inline ldomDocument * getDocument() const { return _documentInstances[_handle._docIndex]; } 914 /// returns pointer to parent node, NULL if node has no parent 915 ldomNode * getParentNode() const; 916 /// returns node type, either LXML_TEXT_NODE or LXML_ELEMENT_NODE getNodeTypeldomNode917 inline lUInt8 getNodeType() const 918 { 919 return (_handle._dataIndex & 1) ? LXML_ELEMENT_NODE : LXML_TEXT_NODE; 920 } 921 /// returns node level, 0 is root node 922 lUInt8 getNodeLevel() const; 923 /// returns dataIndex of node's parent, 0 if no parent 924 int getParentIndex() const; 925 /// returns index of node inside parent's child collection 926 int getNodeIndex() const; 927 /// returns index of child node by dataIndex 928 int getChildIndex( lUInt32 dataIndex ) const; 929 /// returns true if node is document's root 930 bool isRoot() const; 931 /// returns true if node is text isTextldomNode932 inline bool isText() const { return _handle._dataIndex && !(_handle._dataIndex&1); } 933 /// returns true if node is element isElementldomNode934 inline bool isElement() const { return _handle._dataIndex && (_handle._dataIndex&1); } 935 /// returns true if node is and element that has children hasChildrenldomNode936 inline bool hasChildren() { return getChildCount()!=0; } 937 /// returns true if node is element has attributes hasAttributesldomNode938 inline bool hasAttributes() const { return getAttrCount()!=0; } 939 940 /// returns element child count 941 int getChildCount() const; 942 /// returns element attribute count 943 int getAttrCount() const; 944 /// returns attribute value by attribute name id and namespace id 945 const lString32 & getAttributeValue( lUInt16 nsid, lUInt16 id ) const; 946 /// returns attribute value by attribute name getAttributeValueldomNode947 inline const lString32 & getAttributeValue( const lChar32 * attrName ) const 948 { 949 return getAttributeValue( NULL, attrName ); 950 } 951 /// returns attribute value by attribute name getAttributeValueldomNode952 inline const lString32 & getAttributeValue( const lChar8 * attrName ) const 953 { 954 return getAttributeValue( NULL, attrName ); 955 } 956 /// returns attribute value by attribute name and namespace 957 const lString32 & getAttributeValue( const lChar32 * nsName, const lChar32 * attrName ) const; 958 /// returns attribute value by attribute name and namespace 959 const lString32 & getAttributeValue( const lChar8 * nsName, const lChar8 * attrName ) const; 960 /// returns attribute by index 961 const lxmlAttribute * getAttribute( lUInt32 ) const; 962 /// returns true if element node has attribute with specified name id and namespace id 963 bool hasAttribute( lUInt16 nsId, lUInt16 attrId ) const; 964 /// returns attribute name by index 965 const lString32 & getAttributeName( lUInt32 ) const; 966 /// sets attribute value 967 void setAttributeValue( lUInt16 , lUInt16 , const lChar32 * ); 968 /// returns attribute value by attribute name id getAttributeValueldomNode969 inline const lString32 & getAttributeValue( lUInt16 id ) const { return getAttributeValue( LXML_NS_ANY, id ); } 970 /// returns true if element node has attribute with specified name id hasAttributeldomNode971 inline bool hasAttribute( lUInt16 id ) const { return hasAttribute( LXML_NS_ANY, id ); } 972 973 /// returns attribute value by attribute name id, looking at children if needed 974 const lString32 & getFirstInnerAttributeValue( lUInt16 nsid, lUInt16 id ) const; getFirstInnerAttributeValueldomNode975 const lString32 & getFirstInnerAttributeValue( lUInt16 id ) const { return getFirstInnerAttributeValue( LXML_NS_ANY, id ); } 976 977 /// returns element type structure pointer if it was set in document for this element name 978 const css_elem_def_props_t * getElementTypePtr(); 979 /// returns element name id 980 lUInt16 getNodeId() const; 981 /// returns element namespace id 982 lUInt16 getNodeNsId() const; 983 /// replace element name id with another value 984 void setNodeId( lUInt16 ); 985 /// returns element name 986 const lString32 & getNodeName() const; 987 /// compares node name with value, returns true if matches 988 bool isNodeName(const char * name) const; 989 /// returns element namespace name 990 const lString32 & getNodeNsName() const; 991 992 /// returns child node by index 993 ldomNode * getChildNode( lUInt32 index ) const; 994 /// returns true child node is element 995 bool isChildNodeElement( lUInt32 index ) const; 996 /// returns true child node is text 997 bool isChildNodeText( lUInt32 index ) const; 998 /// returns child node by index, NULL if node with this index is not element or nodeId!=0 and element node id!=nodeId 999 ldomNode * getChildElementNode( lUInt32 index, lUInt16 nodeId=0 ) const; 1000 /// returns child node by index, NULL if node with this index is not element or nodeTag!=0 and element node name!=nodeTag 1001 ldomNode * getChildElementNode( lUInt32 index, const lChar32 * nodeTag ) const; 1002 1003 /// returns text node text as wide string 1004 lString32 getText( lChar32 blockDelimiter = 0, int maxSize=0 ) const; 1005 /// returns text node text as utf8 string 1006 lString8 getText8( lChar8 blockDelimiter = 0, int maxSize=0 ) const; 1007 /// sets text node text as wide string 1008 void setText( lString32 ); 1009 /// sets text node text as utf8 string 1010 void setText8( lString8 ); 1011 1012 1013 /// returns node absolute rectangle (with inner=true, for erm_final, additionally 1014 // shifted by the inner paddings (exluding padding bottom) to get the absolute rect 1015 // of the inner LFormattedText. 1016 void getAbsRect( lvRect & rect, bool inner=false ); 1017 /// sets node rendering structure pointer 1018 void clearRenderData(); 1019 /// reset node rendering structure pointer for sub-tree 1020 void clearRenderDataRecursive(); 1021 /// calls specified function recursively for all elements of DOM tree 1022 void recurseElements( void (*pFun)( ldomNode * node ) ); 1023 /// calls specified function recursively for all elements of DOM tree matched by matchFun 1024 void recurseMatchingElements( void (*pFun)( ldomNode * node ), bool (*matchFun)( ldomNode * node ) ); 1025 /// calls specified function recursively for all elements of DOM tree, children before parent 1026 void recurseElementsDeepFirst( void (*pFun)( ldomNode * node ) ); 1027 /// calls specified function recursively for all nodes of DOM tree 1028 void recurseNodes( void (*pFun)( ldomNode * node ) ); 1029 1030 1031 /// returns first text child element 1032 ldomNode * getFirstTextChild( bool skipEmpty=false ); 1033 /// returns last text child element 1034 ldomNode * getLastTextChild(); 1035 1036 #if BUILD_LITE!=1 1037 /// find node by coordinates of point in formatted document 1038 ldomNode * elementFromPoint( lvPoint pt, int direction, bool strict_bounds_checking=false ); 1039 /// find final node by coordinates of point in formatted document 1040 ldomNode * finalBlockFromPoint( lvPoint pt ); 1041 #endif 1042 1043 // rich interface stubs for supporting Element operations 1044 /// returns rendering method 1045 lvdom_element_render_method getRendMethod(); 1046 /// sets rendering method 1047 void setRendMethod( lvdom_element_render_method ); 1048 #if BUILD_LITE!=1 1049 /// returns element style record 1050 css_style_ref_t getStyle() const; 1051 /// returns element font 1052 font_ref_t getFont(); 1053 /// sets element font 1054 void setFont( font_ref_t ); 1055 /// sets element style record 1056 void setStyle( css_style_ref_t & ); 1057 #endif 1058 /// returns first child node 1059 ldomNode * getFirstChild() const; 1060 /// returns last child node 1061 ldomNode * getLastChild() const; 1062 /// removes and deletes last child element 1063 void removeLastChild(); 1064 /// move range of children startChildIndex to endChildIndex inclusively to specified element 1065 void moveItemsTo( ldomNode *, int , int ); 1066 /// find child element by tag id 1067 ldomNode * findChildElement( lUInt16 nsid, lUInt16 id, int index ); 1068 /// find child element by id path 1069 ldomNode * findChildElement( lUInt16 idPath[] ); 1070 /// inserts child element 1071 ldomNode * insertChildElement( lUInt32 index, lUInt16 nsid, lUInt16 id ); 1072 /// inserts child element 1073 ldomNode * insertChildElement( lUInt16 id ); 1074 /// inserts child text 1075 ldomNode * insertChildText( lUInt32 index, const lString32 & value ); 1076 /// inserts child text 1077 ldomNode * insertChildText( const lString32 & value ); 1078 /// inserts child text 1079 ldomNode * insertChildText(const lString8 & value, bool before_last_child=false); 1080 /// remove child 1081 ldomNode * removeChild( lUInt32 index ); 1082 1083 /// returns XPath segment for this element relative to parent element (e.g. "p[10]") 1084 lString32 getXPathSegment(); 1085 1086 /// creates stream to read base64 encoded data from element 1087 LVStreamRef createBase64Stream(); 1088 #if BUILD_LITE!=1 1089 /// returns object image source 1090 LVImageSourceRef getObjectImageSource(); 1091 /// returns object image ref name 1092 lString32 getObjectImageRefName( bool percentDecode=true ); 1093 /// returns object image stream 1094 LVStreamRef getObjectImageStream(); 1095 /// returns the sum of this node and its parents' top and bottom margins, borders and paddings 1096 int getSurroundingAddedHeight(); 1097 /// formats final block 1098 int renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor * fmt, int width, 1099 BlockFloatFootprint * float_footprint=NULL ); 1100 /// formats final block again after change, returns true if size of block is changed 1101 bool refreshFinalBlock(); 1102 #endif 1103 /// replace node with r/o persistent implementation 1104 ldomNode * persist(); 1105 /// replace node with r/w implementation 1106 ldomNode * modify(); 1107 1108 /// for display:list-item node, get marker 1109 bool getNodeListMarker( int & counterValue, lString32 & marker, int & markerWidth ); 1110 /// is node a floating floatBox 1111 bool isFloatingBox() const; 1112 /// is node an inlineBox that has not been re-inlined by having 1113 /// its child no more inline-block/inline-table 1114 bool isBoxingInlineBox() const; 1115 /// is node an inlineBox that wraps a bogus embedded block (not inline-block/inline-table) 1116 /// can be called with inline_box_checks_done=true when isBoxingInlineBox() has already 1117 /// been called to avoid rechecking what is known 1118 bool isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done=false) const; 1119 1120 /// is node any of our internal boxing element (or, optionally, our pseudoElem) 1121 bool isBoxingNode( bool orPseudoElem=false ) const; 1122 1123 /// return real (as in the original HTML) parent/siblings by skipping any internal 1124 /// boxing element up or down (returns NULL when no more sibling) 1125 ldomNode * getUnboxedParent() const; 1126 ldomNode * getUnboxedFirstChild( bool skip_text_nodes=false ) const; 1127 ldomNode * getUnboxedLastChild( bool skip_text_nodes=false ) const; 1128 ldomNode * getUnboxedPrevSibling( bool skip_text_nodes=false ) const; 1129 ldomNode * getUnboxedNextSibling( bool skip_text_nodes=false ) const; 1130 }; 1131 1132 1133 // default: 512K 1134 #define DEF_DOC_DATA_BUFFER_SIZE 0x80000 1135 1136 /// Base class for XML DOM documents 1137 /** 1138 Helps to decrease memory usage and increase performance for DOM implementations. 1139 Maintains Name<->Id maps for element names, namespaces and attributes. 1140 It allows to use short IDs instead of strings in DOM internals, 1141 and avoid duplication of string values. 1142 1143 Manages data storage. 1144 */ 1145 class lxmlDocBase : public tinyNodeCollection 1146 { 1147 friend struct ldomNode; 1148 friend class ldomXPointer; 1149 protected: 1150 1151 1152 /// Default constructor 1153 lxmlDocBase(int dataBufSize = DEF_DOC_DATA_BUFFER_SIZE); 1154 /// Copy constructor - copies ID tables contents 1155 lxmlDocBase( lxmlDocBase & doc ); 1156 public: 1157 /// Destructor 1158 virtual ~lxmlDocBase(); 1159 1160 #if BUILD_LITE!=1 1161 /// serialize to byte array (pointer will be incremented by number of bytes written) 1162 void serializeMaps( SerialBuf & buf ); 1163 /// deserialize from byte array (pointer will be incremented by number of bytes read) 1164 bool deserializeMaps( SerialBuf & buf ); 1165 1166 #endif 1167 1168 //====================================================================== 1169 // Name <-> Id maps functions 1170 1171 /// Get namespace name by id 1172 /** 1173 \param id is numeric value of namespace 1174 \return string value of namespace 1175 */ getNsName(lUInt16 id)1176 inline const lString32 & getNsName( lUInt16 id ) 1177 { 1178 return _nsNameTable.nameById( id ); 1179 } 1180 1181 /// Get namespace id by name 1182 /** 1183 \param name is string value of namespace 1184 \return id of namespace 1185 */ 1186 lUInt16 getNsNameIndex( const lChar32 * name ); 1187 1188 /// Get namespace id by name 1189 /** 1190 \param name is string value of namespace (ASCII only) 1191 \return id of namespace 1192 */ 1193 lUInt16 getNsNameIndex( const lChar8 * name ); 1194 1195 /// Get attribute name by id 1196 /** 1197 \param id is numeric value of attribute 1198 \return string value of attribute 1199 */ getAttrName(lUInt16 id)1200 inline const lString32 & getAttrName( lUInt16 id ) 1201 { 1202 return _attrNameTable.nameById( id ); 1203 } 1204 1205 /// Get attribute id by name 1206 /** 1207 \param name is string value of attribute 1208 \return id of attribute 1209 */ 1210 lUInt16 getAttrNameIndex( const lChar32 * name ); 1211 1212 /// Get attribute id by name 1213 /** 1214 \param name is string value of attribute (8bit ASCII only) 1215 \return id of attribute 1216 */ 1217 lUInt16 getAttrNameIndex( const lChar8 * name ); 1218 1219 /// helper: returns attribute value getAttrValue(lUInt32 index)1220 inline const lString32 & getAttrValue( lUInt32 index ) const 1221 { 1222 return _attrValueTable[index]; 1223 } 1224 1225 /// helper: returns attribute value index getAttrValueIndex(const lChar32 * value)1226 inline lUInt32 getAttrValueIndex( const lChar32 * value ) 1227 { 1228 return (lUInt32)_attrValueTable.add( value ); 1229 } 1230 1231 /// helper: returns attribute value index, 0xffffffff if not found findAttrValueIndex(const lChar32 * value)1232 inline lUInt32 findAttrValueIndex( const lChar32 * value ) 1233 { 1234 return (lUInt32)_attrValueTable.find( value ); 1235 } 1236 1237 /// Get element name by id 1238 /** 1239 \param id is numeric value of element name 1240 \return string value of element name 1241 */ getElementName(lUInt16 id)1242 inline const lString32 & getElementName( lUInt16 id ) 1243 { 1244 return _elementNameTable.nameById( id ); 1245 } 1246 1247 /// Get element id by name 1248 /** 1249 \param name is string value of element name 1250 \return id of element 1251 */ 1252 lUInt16 getElementNameIndex( const lChar32 * name ); 1253 1254 /// Get element id by name 1255 /** 1256 \param name is string value of element name (8bit ASCII only) 1257 \return id of element, allocates new ID if not found 1258 */ 1259 lUInt16 getElementNameIndex( const lChar8 * name ); 1260 1261 /// Get element id by name 1262 /** 1263 \param name is string value of element name (8bit ASCII only) 1264 \return id of element, 0 if not found 1265 */ 1266 lUInt16 findElementNameIndex( const lChar8 * name ); 1267 1268 /// Get element type properties structure by id 1269 /** 1270 \param id is element id 1271 \return pointer to elem_def_t structure containing type properties 1272 \sa elem_def_t 1273 */ getElementTypePtr(lUInt16 id)1274 inline const css_elem_def_props_t * getElementTypePtr( lUInt16 id ) 1275 { 1276 return _elementNameTable.dataById( id ); 1277 } 1278 1279 // set node types from table 1280 void setNodeTypes( const elem_def_t * node_scheme ); 1281 // set attribute types from table 1282 void setAttributeTypes( const attr_def_t * attr_scheme ); 1283 // set namespace types from table 1284 void setNameSpaceTypes( const ns_def_t * ns_scheme ); 1285 1286 // debug dump 1287 void dumpUnknownEntities( const char * fname ); 1288 lString32Collection getUnknownEntities(); 1289 1290 /// garbage collector gc()1291 virtual void gc() 1292 { 1293 #if BUILD_LITE!=1 1294 fontMan->gc(); 1295 #endif 1296 } 1297 getStyleSheet()1298 inline LVStyleSheet * getStyleSheet() { return &_stylesheet; } 1299 /// sets style sheet, clears old content of css if arg replace is true 1300 void setStyleSheet( const char * css, bool replace ); 1301 1302 #if BUILD_LITE!=1 1303 /// apply document's stylesheet to element node applyStyle(ldomNode * element,css_style_rec_t * pstyle)1304 inline void applyStyle( ldomNode * element, css_style_rec_t * pstyle) 1305 { 1306 _stylesheet.apply( element, pstyle ); 1307 } 1308 #endif 1309 1310 void onAttributeSet( lUInt16 attrId, lUInt32 valueId, ldomNode * node ); 1311 1312 /// get element by id attribute value code getNodeById(lUInt32 attrValueId)1313 inline ldomNode * getNodeById( lUInt32 attrValueId ) 1314 { 1315 return getTinyNode( _idNodeMap.get( attrValueId ) ); 1316 } 1317 1318 /// get element by id attribute value getElementById(const lChar32 * id)1319 inline ldomNode * getElementById( const lChar32 * id ) 1320 { 1321 lUInt32 attrValueId = getAttrValueIndex( id ); 1322 ldomNode * node = getNodeById( attrValueId ); 1323 return node; 1324 } 1325 /// returns root element 1326 ldomNode * getRootNode(); 1327 1328 /// returns code base path relative to document container getCodeBase()1329 inline lString32 getCodeBase() { return getProps()->getStringDef(DOC_PROP_CODE_BASE, ""); } 1330 /// sets code base path relative to document container setCodeBase(const lString32 & codeBase)1331 inline void setCodeBase(const lString32 & codeBase) { getProps()->setStringDef(DOC_PROP_CODE_BASE, codeBase); } 1332 1333 #ifdef _DEBUG 1334 #if BUILD_LITE!=1 1335 ///debug method, for DOM tree consistency check, returns false if failed 1336 bool checkConsistency( bool requirePersistent ); 1337 #endif 1338 #endif 1339 1340 1341 /// create formatted text object with options set 1342 LFormattedText * createFormattedText(); 1343 1344 #if BUILD_LITE!=1 setHightlightOptions(text_highlight_options_t & options)1345 void setHightlightOptions(text_highlight_options_t & options) { 1346 _highlightOptions = options; 1347 } 1348 #endif 1349 1350 protected: 1351 #if BUILD_LITE!=1 1352 struct DocFileHeader { 1353 lUInt32 render_dx; 1354 lUInt32 render_dy; 1355 lUInt32 render_docflags; 1356 lUInt32 render_style_hash; 1357 lUInt32 stylesheet_hash; 1358 lUInt32 node_displaystyle_hash; 1359 bool serialize( SerialBuf & buf ); 1360 bool deserialize( SerialBuf & buf ); DocFileHeaderDocFileHeader1361 DocFileHeader() 1362 : render_dx(0), render_dy(0), render_docflags(0), render_style_hash(0), stylesheet_hash(0), 1363 node_displaystyle_hash(NODE_DISPLAY_STYLE_HASH_UNINITIALIZED) 1364 { 1365 } 1366 }; 1367 DocFileHeader _hdr; 1368 text_highlight_options_t _highlightOptions; 1369 #endif 1370 1371 LDOMNameIdMap _elementNameTable; // Element Name<->Id map 1372 LDOMNameIdMap _attrNameTable; // Attribute Name<->Id map 1373 LDOMNameIdMap _nsNameTable; // Namespace Name<->Id map 1374 lUInt16 _nextUnknownElementId; // Next Id for unknown element 1375 lUInt16 _nextUnknownAttrId; // Next Id for unknown attribute 1376 lUInt16 _nextUnknownNsId; // Next Id for unknown namespace 1377 lString32HashedCollection _attrValueTable; 1378 LVHashTable<lUInt32,lInt32> _idNodeMap; // id to data index map 1379 LVHashTable<lString32,LVImageSourceRef> _urlImageMap; // url to image source map 1380 lUInt16 _idAttrId; // Id for "id" attribute name 1381 lUInt16 _nameAttrId; // Id for "name" attribute name 1382 1383 #if BUILD_LITE!=1 1384 SerialBuf _pagesData; 1385 #endif 1386 1387 }; 1388 1389 /* 1390 struct lxmlNode 1391 { 1392 lUInt32 parent; 1393 lUInt8 nodeType; 1394 lUInt8 nodeLevel; 1395 }; 1396 */ 1397 1398 struct lxmlAttribute 1399 { 1400 // 1401 lUInt16 nsid; 1402 lUInt16 id; 1403 lUInt32 index; comparelxmlAttribute1404 inline bool compare( lUInt16 nsId, lUInt16 attrId ) 1405 { 1406 return (nsId == nsid || nsId == LXML_NS_ANY) && (id == attrId); 1407 } setDatalxmlAttribute1408 inline void setData( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex ) 1409 { 1410 nsid = nsId; 1411 id = attrId; 1412 index = valueIndex; 1413 } 1414 }; 1415 1416 class ldomDocument; 1417 1418 1419 #define LDOM_ALLOW_NODE_INDEX 0 1420 1421 1422 class ldomDocument; 1423 1424 /** 1425 * @brief XPointer/XPath object with reference counting. 1426 * 1427 */ 1428 class ldomXPointer 1429 { 1430 protected: 1431 friend class ldomXPointerEx; 1432 struct XPointerData { 1433 protected: 1434 ldomDocument * _doc; 1435 lInt32 _dataIndex; 1436 int _offset; 1437 int _refCount; 1438 public: addRefXPointerData1439 inline void addRef() { _refCount++; } decRefXPointerData1440 inline int decRef() { return --_refCount; } 1441 // create empty XPointerDataXPointerData1442 XPointerData() : _doc(NULL), _dataIndex(0), _offset(0), _refCount(1) { } 1443 // create instance XPointerDataXPointerData1444 XPointerData( ldomNode * node, int offset ) 1445 : _doc(node?node->getDocument():NULL) 1446 , _dataIndex(node?node->getDataIndex():0) 1447 , _offset( offset ) 1448 , _refCount( 1 ) 1449 { } 1450 // clone XPointerDataXPointerData1451 XPointerData( const XPointerData & v ) : _doc(v._doc), _dataIndex(v._dataIndex), _offset(v._offset), _refCount(1) { } getDocumentXPointerData1452 inline ldomDocument * getDocument() { return _doc; } 1453 inline bool operator == (const XPointerData & v) const 1454 { 1455 return _doc==v._doc && _dataIndex == v._dataIndex && _offset == v._offset; 1456 } 1457 inline bool operator != (const XPointerData & v) const 1458 { 1459 return _doc!=v._doc || _dataIndex != v._dataIndex || _offset != v._offset; 1460 } isNullXPointerData1461 inline bool isNull() { return _dataIndex==0 || _doc==NULL; } getNodeXPointerData1462 inline ldomNode * getNode() { return _dataIndex>0 ? ((lxmlDocBase*)_doc)->getTinyNode( _dataIndex ) : NULL; } getOffsetXPointerData1463 inline int getOffset() { return _offset; } setNodeXPointerData1464 inline void setNode( ldomNode * node ) 1465 { 1466 if ( node ) { 1467 _doc = node->getDocument(); 1468 _dataIndex = node->getDataIndex(); 1469 } else { 1470 _doc = NULL; 1471 _dataIndex = 0; 1472 } 1473 } setOffsetXPointerData1474 inline void setOffset( int offset ) { _offset = offset; } addOffsetXPointerData1475 inline void addOffset( int offset ) { _offset+=offset; } ~XPointerDataXPointerData1476 ~XPointerData() { } 1477 }; 1478 XPointerData * _data; 1479 /// node pointer 1480 //ldomNode * _node; 1481 /// offset within node for pointer, -1 for xpath 1482 //int _offset; 1483 // cloning constructor ldomXPointer(const XPointerData * data)1484 ldomXPointer( const XPointerData * data ) 1485 : _data( new XPointerData( *data ) ) 1486 { 1487 } 1488 public: 1489 /// clear pointer (make null) clear()1490 void clear() 1491 { 1492 if (_data->decRef() == 0) 1493 delete _data; 1494 _data = new XPointerData(); 1495 } 1496 /// return document getDocument()1497 inline ldomDocument * getDocument() { return _data->getDocument(); } 1498 /// returns node pointer getNode()1499 inline ldomNode * getNode() const { return _data->getNode(); } 1500 #if BUILD_LITE!=1 1501 /// return parent final node, if found 1502 ldomNode * getFinalNode() const; 1503 /// return true is this node is a final node 1504 bool isFinalNode() const; 1505 #endif 1506 /// returns offset within node getOffset()1507 inline int getOffset() const { return _data->getOffset(); } 1508 /// set pointer node setNode(ldomNode * node)1509 inline void setNode( ldomNode * node ) { _data->setNode( node ); } 1510 /// set pointer offset within node setOffset(int offset)1511 inline void setOffset( int offset ) { _data->setOffset( offset ); } 1512 /// default constructor makes NULL pointer ldomXPointer()1513 ldomXPointer() 1514 : _data( new XPointerData() ) 1515 { 1516 } 1517 /// remove reference ~ldomXPointer()1518 ~ldomXPointer() 1519 { 1520 if (_data->decRef() == 0) 1521 delete _data; 1522 } 1523 /// copy constructor ldomXPointer(const ldomXPointer & v)1524 ldomXPointer( const ldomXPointer& v ) 1525 : _data(v._data) 1526 { 1527 _data->addRef(); 1528 } 1529 /// assignment operator 1530 ldomXPointer & operator =( const ldomXPointer& v ) 1531 { 1532 if ( _data==v._data ) 1533 return *this; 1534 if (_data->decRef() == 0) 1535 delete _data; 1536 _data = v._data; 1537 _data->addRef(); 1538 return *this; 1539 } 1540 /// constructor ldomXPointer(ldomNode * node,int offset)1541 ldomXPointer( ldomNode * node, int offset ) 1542 : _data( new XPointerData( node, offset ) ) 1543 { 1544 } 1545 /// get pointer for relative path 1546 ldomXPointer relative( lString32 relativePath ); 1547 /// get pointer for relative path relative(const lChar32 * relativePath)1548 ldomXPointer relative( const lChar32 * relativePath ) 1549 { 1550 return relative( lString32(relativePath) ); 1551 } 1552 1553 /// returns true for NULL pointer isNull()1554 bool isNull() const 1555 { 1556 return !_data || _data->isNull(); 1557 } 1558 /// returns true if object is pointer isPointer()1559 bool isPointer() const 1560 { 1561 return !_data->isNull() && getOffset()>=0; 1562 } 1563 /// returns true if object is path (no offset specified) isPath()1564 bool isPath() const 1565 { 1566 return !_data->isNull() && getOffset()==-1; 1567 } 1568 /// returns true if pointer is NULL 1569 bool operator !() const 1570 { 1571 return _data->isNull(); 1572 } 1573 /// returns true if pointers are equal 1574 bool operator == (const ldomXPointer & v) const 1575 { 1576 return *_data == *v._data; 1577 } 1578 /// returns true if pointers are not equal 1579 bool operator != (const ldomXPointer & v) const 1580 { 1581 return *_data != *v._data; 1582 } 1583 //#if BUILD_LITE!=1 1584 /// returns caret rectangle for pointer inside formatted document 1585 bool getRect(lvRect & rect, bool extended=false, bool adjusted=false) const; 1586 /// returns glyph rectangle for pointer inside formatted document considering paddings and borders 1587 /// (with adjusted=true, adjust for left and right side bearing of the glyph, for cleaner highlighting) 1588 bool getRectEx(lvRect & rect, bool adjusted=false) const { return getRect(rect, true, adjusted); } 1589 /// returns coordinates of pointer inside formatted document 1590 lvPoint toPoint( bool extended=false ) const; 1591 //#endif 1592 /// converts to string 1593 lString32 toString( XPointerMode mode = XPATH_USE_NAMES) { 1594 if( XPATH_USE_NAMES==mode ) { 1595 tinyNodeCollection* doc = (tinyNodeCollection*)_data->getDocument(); 1596 if ( doc != NULL && doc->getDOMVersionRequested() >= DOM_VERSION_WITH_NORMALIZED_XPOINTERS ) 1597 return toStringV2(); 1598 return toStringV1(); 1599 } 1600 return toStringV2AsIndexes(); 1601 } 1602 lString32 toStringV1(); // Using names, old, with boxing elements (non-normalized) 1603 lString32 toStringV2(); // Using names, new, without boxing elements, so: normalized 1604 lString32 toStringV2AsIndexes(); // Without element names, normalized (not used) 1605 1606 /// returns XPath node text 1607 lString32 getText( lChar32 blockDelimiter=0 ) 1608 { 1609 ldomNode * node = getNode(); 1610 if ( !node ) 1611 return lString32::empty_str; 1612 return node->getText( blockDelimiter ); 1613 } 1614 /// returns href attribute of <A> element, null string if not found 1615 lString32 getHRef(); 1616 /// returns href attribute of <A> element, plus xpointer of <A> element itself 1617 lString32 getHRef(ldomXPointer & a_xpointer); 1618 /// create a copy of pointer data clone()1619 ldomXPointer * clone() 1620 { 1621 return new ldomXPointer( _data ); 1622 } 1623 /// returns true if current node is element isElement()1624 inline bool isElement() const { return !isNull() && getNode()->isElement(); } 1625 /// returns true if current node is element isText()1626 inline bool isText() const { return !isNull() && getNode()->isText(); } 1627 /// returns HTML (serialized from the DOM, may be different from the source HTML) 1628 lString8 getHtml( lString32Collection & cssFiles, int wflags=0 ); 1629 lString8 getHtml( int wflags=0 ) { 1630 lString32Collection cssFiles; return getHtml(cssFiles, wflags); 1631 } 1632 }; 1633 1634 #define MAX_DOM_LEVEL 64 1635 /// Xpointer optimized to iterate through DOM tree 1636 class ldomXPointerEx : public ldomXPointer 1637 { 1638 protected: 1639 int _indexes[MAX_DOM_LEVEL]; 1640 int _level; 1641 void initIndex(); 1642 public: 1643 /// returns bottom level index getIndex()1644 int getIndex() { return _indexes[_level-1]; } 1645 /// returns node level getLevel()1646 int getLevel() { return _level; } 1647 /// default constructor ldomXPointerEx()1648 ldomXPointerEx() 1649 : ldomXPointer() 1650 { 1651 initIndex(); 1652 } 1653 /// constructor by node pointer and offset ldomXPointerEx(ldomNode * node,int offset)1654 ldomXPointerEx( ldomNode * node, int offset ) 1655 : ldomXPointer( node, offset ) 1656 { 1657 initIndex(); 1658 } 1659 /// copy constructor ldomXPointerEx(const ldomXPointer & v)1660 ldomXPointerEx( const ldomXPointer& v ) 1661 : ldomXPointer( v._data ) 1662 { 1663 initIndex(); 1664 } 1665 /// copy constructor ldomXPointerEx(const ldomXPointerEx & v)1666 ldomXPointerEx( const ldomXPointerEx& v ) 1667 : ldomXPointer( v._data ) 1668 { 1669 _level = v._level; 1670 for ( int i=0; i<_level; i++ ) 1671 _indexes[ i ] = v._indexes[i]; 1672 } 1673 /// assignment operator 1674 ldomXPointerEx & operator =( const ldomXPointer& v ) 1675 { 1676 if ( _data==v._data ) 1677 return *this; 1678 if (_data->decRef() == 0) 1679 delete _data; 1680 _data = new XPointerData( *v._data ); 1681 initIndex(); 1682 return *this; 1683 } 1684 /// assignment operator 1685 ldomXPointerEx & operator =( const ldomXPointerEx& v ) 1686 { 1687 if ( _data==v._data ) 1688 return *this; 1689 if (_data->decRef() == 0) 1690 delete _data; 1691 _data = new XPointerData( *v._data ); 1692 _level = v._level; 1693 for ( int i=0; i<_level; i++ ) 1694 _indexes[ i ] = v._indexes[i]; 1695 return *this; 1696 } 1697 /// returns true if ranges are equal 1698 bool operator == ( const ldomXPointerEx & v ) const 1699 { 1700 return _data->getDocument()==v._data->getDocument() && _data->getNode()==v._data->getNode() && _data->getOffset()==v._data->getOffset(); 1701 } 1702 /// searches path for element with specific id, returns level at which element is founs, 0 if not found 1703 int findElementInPath( lUInt16 id ); 1704 /// compare two pointers, returns -1, 0, +1 1705 int compare( const ldomXPointerEx& v ) const; 1706 /// move to next sibling 1707 bool nextSibling(); 1708 /// move to previous sibling 1709 bool prevSibling(); 1710 /// move to next sibling element 1711 bool nextSiblingElement(); 1712 /// move to previous sibling element 1713 bool prevSiblingElement(); 1714 /// move to parent 1715 bool parent(); 1716 /// move to first child of current node 1717 bool firstChild(); 1718 /// move to last child of current node 1719 bool lastChild(); 1720 /// move to first element child of current node 1721 bool firstElementChild(); 1722 /// move to last element child of current node 1723 bool lastElementChild(); 1724 /// move to child # 1725 bool child( int index ); 1726 /// move to sibling # 1727 bool sibling( int index ); 1728 /// ensure that current node is element (move to parent, if not - from text node to element) 1729 bool ensureElement(); 1730 /// moves pointer to parent element with FINAL render method, returns true if success 1731 bool ensureFinal(); 1732 /// returns true if current node is visible element with render method == erm_final 1733 bool isVisibleFinal(); 1734 /// move to next final visible node (~paragraph) 1735 bool nextVisibleFinal(); 1736 /// move to previous final visible node (~paragraph) 1737 bool prevVisibleFinal(); 1738 /// returns true if current node is visible element or text 1739 bool isVisible(); 1740 // returns true if text node char at offset is part of a word 1741 bool isVisibleWordChar(); 1742 /// move to next text node 1743 bool nextText( bool thisBlockOnly = false ); 1744 /// move to previous text node 1745 bool prevText( bool thisBlockOnly = false ); 1746 /// move to next visible text node 1747 bool nextVisibleText( bool thisBlockOnly = false ); 1748 /// move to previous visible text node 1749 bool prevVisibleText( bool thisBlockOnly = false ); 1750 1751 /// move to prev visible char 1752 bool prevVisibleChar( bool thisBlockOnly = false ); 1753 /// move to next visible char 1754 bool nextVisibleChar( bool thisBlockOnly = false ); 1755 1756 /// move to previous visible word beginning 1757 bool prevVisibleWordStart( bool thisBlockOnly = false ); 1758 /// move to previous visible word end 1759 bool prevVisibleWordEnd( bool thisBlockOnly = false ); 1760 /// move to next visible word beginning 1761 bool nextVisibleWordStart( bool thisBlockOnly = false ); 1762 /// move to end of current word 1763 bool thisVisibleWordEnd( bool thisBlockOnly = false ); 1764 /// move to next visible word end 1765 bool nextVisibleWordEnd( bool thisBlockOnly = false ); 1766 1767 /// move to previous visible word beginning (in sentence) 1768 bool prevVisibleWordStartInSentence(); 1769 /// move to previous visible word end (in sentence) 1770 bool prevVisibleWordEndInSentence(); 1771 /// move to next visible word beginning (in sentence) 1772 bool nextVisibleWordStartInSentence(); 1773 /// move to end of current word (in sentence) 1774 bool thisVisibleWordEndInSentence(); 1775 /// move to next visible word end (in sentence) 1776 bool nextVisibleWordEndInSentence(); 1777 1778 /// move to beginning of current visible text sentence 1779 bool thisSentenceStart(); 1780 /// move to end of current visible text sentence 1781 bool thisSentenceEnd(); 1782 /// move to beginning of next visible text sentence 1783 bool nextSentenceStart(); 1784 /// move to beginning of next visible text sentence 1785 bool prevSentenceStart(); 1786 /// move to end of next visible text sentence 1787 bool nextSentenceEnd(); 1788 /// move to end of prev visible text sentence 1789 bool prevSentenceEnd(); 1790 /// returns true if points to beginning of sentence 1791 bool isSentenceStart(); 1792 /// returns true if points to end of sentence 1793 bool isSentenceEnd(); 1794 1795 /// returns true if points to last visible text inside block element 1796 bool isLastVisibleTextInBlock(); 1797 /// returns true if points to first visible text inside block element 1798 bool isFirstVisibleTextInBlock(); 1799 1800 /// returns block owner node of current node (or current node if it's block) 1801 ldomNode * getThisBlockNode(); 1802 1803 /// returns true if current position is visible word beginning 1804 bool isVisibleWordStart(); 1805 /// returns true if current position is visible word end 1806 bool isVisibleWordEnd(); 1807 /// forward iteration by elements of DOM three 1808 bool nextElement(); 1809 /// backward iteration by elements of DOM three 1810 bool prevElement(); 1811 /// calls specified function recursively for all elements of DOM tree 1812 void recurseElements( void (*pFun)( ldomXPointerEx & node ) ); 1813 /// calls specified function recursively for all nodes of DOM tree 1814 void recurseNodes( void (*pFun)( ldomXPointerEx & node ) ); 1815 1816 /// move to next sibling or parent's next sibling 1817 bool nextOuterElement(); 1818 /// move to (end of) last and deepest child node descendant of current node 1819 bool lastInnerNode( bool toTextEnd=false ); 1820 /// move to (end of) last and deepest child text node descendant of current node 1821 bool lastInnerTextNode( bool toTextEnd=false ); 1822 }; 1823 1824 class ldomXRange; 1825 1826 /// callback for DOM tree iteration interface 1827 class ldomNodeCallback { 1828 public: 1829 /// destructor ~ldomNodeCallback()1830 virtual ~ldomNodeCallback() { } 1831 /// called for each found text fragment in range 1832 virtual void onText( ldomXRange * ) = 0; 1833 /// called for each found node in range 1834 virtual bool onElement( ldomXPointerEx * ) = 0; 1835 }; 1836 1837 /// range for word inside text node 1838 class ldomWord 1839 { 1840 ldomNode * _node; 1841 int _start; 1842 int _end; 1843 public: ldomWord()1844 ldomWord( ) 1845 : _node(NULL), _start(0), _end(0) 1846 { } ldomWord(ldomNode * node,int start,int end)1847 ldomWord( ldomNode * node, int start, int end ) 1848 : _node(node), _start(start), _end(end) 1849 { } ldomWord(const ldomWord & v)1850 ldomWord( const ldomWord & v ) 1851 : _node(v._node), _start(v._start), _end(v._end) 1852 { } 1853 ldomWord & operator = ( const ldomWord & v ) 1854 { 1855 _node = v._node; 1856 _start = v._start; 1857 _end = v._end; 1858 return *this; 1859 } 1860 /// returns true if object doesn't point valid word isNull()1861 bool isNull() { return _node==NULL || _start<0 || _end<=_start; } 1862 /// get word text node pointer getNode()1863 ldomNode * getNode() const { return _node; } 1864 /// get word start offset getStart()1865 int getStart() const { return _start; } 1866 /// get word end offset getEnd()1867 int getEnd() const { return _end; } 1868 /// get word start XPointer getStartXPointer()1869 ldomXPointer getStartXPointer() const { return ldomXPointer( _node, _start ); } 1870 /// get word start XPointer getEndXPointer()1871 ldomXPointer getEndXPointer() const { return ldomXPointer( _node, _end ); } 1872 /// get word text getText()1873 lString32 getText() 1874 { 1875 if ( isNull() ) 1876 return lString32::empty_str; 1877 lString32 txt = _node->getText(); 1878 return txt.substr( _start, _end-_start ); 1879 } 1880 }; 1881 1882 /// DOM range 1883 class ldomXRange { 1884 ldomXPointerEx _start; 1885 ldomXPointerEx _end; 1886 /// _flags, only used by ldomXRangeList.getRanges() when making a ldomMarkedRangeList (for native 1887 // highlighting of a text selection being made, and for crengine internal bookmarks): 1888 // 0: not shown (filtered out in LVDocView::updateSelections() by ldomXRangeList ranges(..., true)) 1889 // 1,2,3: legacy drawing (will make a single ldomMarkedRange spanning multiple lines, assuming 1890 // full width LTR paragraphs) (2 & 3 might be used for crengine internal bookmarks, 1891 // see hist.h for enum bmk_type) 1892 // 0x11, 0x12, 0x13: enhanced drawing (will make multiple segmented ldomMarkedRange, 1893 // each spanning a single line) 1894 lUInt32 _flags; 1895 public: ldomXRange()1896 ldomXRange() 1897 : _flags(0) 1898 { 1899 } 1900 ldomXRange( const ldomXPointerEx & start, const ldomXPointerEx & end, lUInt32 flags=0 ) _start(start)1901 : _start( start ), _end( end ), _flags(flags) 1902 { 1903 } ldomXRange(const ldomXPointer & start,const ldomXPointer & end)1904 ldomXRange( const ldomXPointer & start, const ldomXPointer & end ) 1905 : _start( start ), _end( end ), _flags(0) 1906 { 1907 } 1908 /// copy constructor ldomXRange(const ldomXRange & v)1909 ldomXRange( const ldomXRange & v ) 1910 : _start( v._start ), _end( v._end ), _flags(v._flags) 1911 { 1912 } ldomXRange(const ldomWord & word)1913 ldomXRange( const ldomWord & word ) 1914 : _start( word.getStartXPointer() ), _end( word.getEndXPointer() ), _flags(1) 1915 { 1916 } 1917 /// if start is after end, swap start and end 1918 void sort(); 1919 /// create intersection of two ranges 1920 ldomXRange( const ldomXRange & v1, const ldomXRange & v2 ); 1921 /// copy constructor of full node range 1922 ldomXRange( ldomNode * p, bool fitEndToLastInnerChild=false ); 1923 /// copy assignment 1924 ldomXRange & operator = ( const ldomXRange & v ) 1925 { 1926 _start = v._start; 1927 _end = v._end; 1928 return *this; 1929 } 1930 /// returns true if ranges are equal 1931 bool operator == ( const ldomXRange & v ) const 1932 { 1933 return _start == v._start && _end == v._end && _flags==v._flags; 1934 } 1935 /// returns true if interval is invalid or empty isNull()1936 bool isNull() 1937 { 1938 if ( _start.isNull() || _end.isNull() ) 1939 return true; 1940 if ( _start.compare( _end ) > 0 ) 1941 return true; 1942 return false; 1943 } 1944 /// makes range empty clear()1945 void clear() 1946 { 1947 _start.clear(); 1948 _end.clear(); 1949 _flags = 0; 1950 } 1951 /// returns true if pointer position is inside range isInside(const ldomXPointerEx & p)1952 bool isInside( const ldomXPointerEx & p ) const 1953 { 1954 return ( _start.compare( p ) <= 0 && _end.compare( p ) >= 0 ); 1955 } 1956 /// returns interval start point getStart()1957 ldomXPointerEx & getStart() { return _start; } 1958 /// returns interval end point getEnd()1959 ldomXPointerEx & getEnd() { return _end; } 1960 /// sets interval start point setStart(ldomXPointerEx & start)1961 void setStart( ldomXPointerEx & start ) { _start = start; } 1962 /// sets interval end point setEnd(ldomXPointerEx & end)1963 void setEnd( ldomXPointerEx & end ) { _end = end; } 1964 /// returns flags value getFlags()1965 lUInt32 getFlags() { return _flags; } 1966 /// sets new flags value setFlags(lUInt32 flags)1967 void setFlags( lUInt32 flags ) { _flags = flags; } 1968 /// returns true if this interval intersects specified interval 1969 bool checkIntersection( ldomXRange & v ); 1970 /// returns text between two XPointer positions 1971 lString32 getRangeText( lChar32 blockDelimiter='\n', int maxTextLen=0 ); 1972 /// get all words from specified range 1973 void getRangeWords( LVArray<ldomWord> & list ); 1974 /// returns href attribute of <A> element, null string if not found 1975 lString32 getHRef(); 1976 /// returns href attribute of <A> element, plus xpointer of <A> element itself 1977 lString32 getHRef(ldomXPointer & a_xpointer); 1978 /// sets range to nearest word bounds, returns true if success 1979 static bool getWordRange( ldomXRange & range, ldomXPointer & p ); 1980 /// run callback for each node in range 1981 void forEach( ldomNodeCallback * callback ); 1982 #if BUILD_LITE!=1 1983 /// returns rectangle (in doc coordinates) for range. Returns true if found. 1984 bool getRectEx( lvRect & rect, bool & isSingleLine ); getRectEx(lvRect & rect)1985 bool getRectEx( lvRect & rect ) { 1986 bool isSingleLine; return getRectEx(rect, isSingleLine); 1987 }; 1988 // returns multiple segments rects (one for each text line) 1989 // that the ldomXRange spans on the page. 1990 void getSegmentRects( LVArray<lvRect> & rects ); 1991 #endif 1992 /// returns nearest common element for start and end points 1993 ldomNode * getNearestCommonParent(); 1994 /// returns HTML (serialized from the DOM, may be different from the source HTML) 1995 lString8 getHtml( lString32Collection & cssFiles, int wflags=0, bool fromRootNode=false ); 1996 lString8 getHtml( int wflags=0, bool fromRootNode=false ) { 1997 lString32Collection cssFiles; return getHtml(cssFiles, wflags, fromRootNode); 1998 }; 1999 2000 /// searches for specified text inside range 2001 bool findText( lString32 pattern, bool caseInsensitive, bool reverse, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY = -1, bool checkMaxFromStart = false ); 2002 }; 2003 2004 class ldomMarkedText 2005 { 2006 public: 2007 lString32 text; 2008 lUInt32 flags; 2009 int offset; ldomMarkedText(lString32 s,lUInt32 flg,int offs)2010 ldomMarkedText( lString32 s, lUInt32 flg, int offs ) 2011 : text(s), flags(flg), offset(offs) 2012 { 2013 } ldomMarkedText(const ldomMarkedText & v)2014 ldomMarkedText( const ldomMarkedText & v ) 2015 : text(v.text), flags(v.flags) 2016 { 2017 } 2018 }; 2019 2020 typedef LVPtrVector<ldomMarkedText> ldomMarkedTextList; 2021 2022 enum MoveDirection { 2023 DIR_ANY, 2024 DIR_LEFT, 2025 DIR_RIGHT, 2026 DIR_UP, 2027 DIR_DOWN 2028 }; 2029 2030 /// range in document, marked with specified flags 2031 class ldomMarkedRange 2032 { 2033 public: 2034 /// start document point 2035 lvPoint start; 2036 /// end document point 2037 lvPoint end; 2038 /// flags: 2039 // 0: not shown 2040 // 1,2,3: legacy drawing (a single mark may spans multiple lines, assuming full width 2041 // LTR paragraphs) (2 & 3 might be used for crengine internal bookmarks, 2042 // see hist.h for enum bmk_type) 2043 // 0x11, 0x12, 0x13: enhanced drawing (segmented mark, spanning a single line) 2044 lUInt32 flags; empty()2045 bool empty() 2046 { 2047 return ( start.y>end.y || ( start.y == end.y && start.x >= end.x ) ); 2048 } 2049 /// returns mark middle point for single line mark, or start point for multiline mark 2050 lvPoint getMiddlePoint(); 2051 /// returns distance (dx+dy) from specified point to middle point 2052 int calcDistance( int x, int y, MoveDirection dir ); 2053 /// returns true if intersects specified line rectangle 2054 bool intersects( lvRect & rc, lvRect & intersection ); 2055 /// constructor ldomMarkedRange(lvPoint _start,lvPoint _end,lUInt32 _flags)2056 ldomMarkedRange( lvPoint _start, lvPoint _end, lUInt32 _flags ) 2057 : start(_start), end(_end), flags(_flags) 2058 { 2059 } 2060 /// constructor ldomMarkedRange(ldomWord & word)2061 ldomMarkedRange( ldomWord & word ) { 2062 ldomXPointer startPos(word.getNode(), word.getStart() ); 2063 ldomXPointer endPos(word.getNode(), word.getEnd() ); 2064 start = startPos.toPoint(); 2065 end = endPos.toPoint(); 2066 } 2067 /// copy constructor ldomMarkedRange(const ldomMarkedRange & v)2068 ldomMarkedRange( const ldomMarkedRange & v ) 2069 : start(v.start), end(v.end), flags(v.flags) 2070 { 2071 } 2072 }; 2073 2074 class ldomWordEx : public ldomWord 2075 { 2076 ldomWord _word; 2077 ldomMarkedRange _mark; 2078 ldomXRange _range; 2079 lString32 _text; 2080 public: ldomWordEx(ldomWord & word)2081 ldomWordEx( ldomWord & word ) 2082 : _word(word), _mark(word), _range(word) 2083 { 2084 _text = removeSoftHyphens( _word.getText() ); 2085 } getWord()2086 ldomWord & getWord() { return _word; } getRange()2087 ldomXRange & getRange() { return _range; } getMark()2088 ldomMarkedRange & getMark() { return _mark; } getText()2089 lString32 & getText() { return _text; } 2090 }; 2091 2092 /// list of extended words 2093 class ldomWordExList : public LVPtrVector<ldomWordEx> 2094 { 2095 int minx; 2096 int maxx; 2097 int miny; 2098 int maxy; 2099 int x; 2100 int y; 2101 ldomWordEx * selWord; 2102 lString32Collection pattern; 2103 void init(); 2104 ldomWordEx * findWordByPattern(); 2105 public: ldomWordExList()2106 ldomWordExList() 2107 : minx(-1), maxx(-1), miny(-1), maxy(-1), x(-1), y(-1), selWord(NULL) 2108 { 2109 } 2110 /// adds all visible words from range, returns number of added words 2111 int addRangeWords( ldomXRange & range, bool trimPunctuation ); 2112 /// find word nearest to specified point 2113 ldomWordEx * findNearestWord( int x, int y, MoveDirection dir ); 2114 /// select word 2115 void selectWord( ldomWordEx * word, MoveDirection dir ); 2116 /// select next word in specified direction 2117 ldomWordEx * selectNextWord( MoveDirection dir, int moveBy = 1 ); 2118 /// select middle word in range 2119 ldomWordEx * selectMiddleWord(); 2120 /// get selected word getSelWord()2121 ldomWordEx * getSelWord() { return selWord; } 2122 /// try append search pattern and find word 2123 ldomWordEx * appendPattern(lString32 chars); 2124 /// remove last character from pattern and try to search 2125 ldomWordEx * reducePattern(); 2126 }; 2127 2128 2129 /// list of marked ranges 2130 class ldomMarkedRangeList : public LVPtrVector<ldomMarkedRange> 2131 { 2132 public: ldomMarkedRangeList()2133 ldomMarkedRangeList() 2134 { 2135 } 2136 /// create bounded by RC list, with (0,0) coordinates at left top corner 2137 // crop/discard elements outside of rc (or outside of crop_rc instead if provided) 2138 ldomMarkedRangeList( const ldomMarkedRangeList * list, lvRect & rc, lvRect * crop_rc=NULL ); 2139 }; 2140 2141 class ldomXRangeList : public LVPtrVector<ldomXRange> 2142 { 2143 public: 2144 /// add ranges for words addWords(const LVArray<ldomWord> & words)2145 void addWords( const LVArray<ldomWord> & words ) 2146 { 2147 for ( int i=0; i<words.length(); i++ ) 2148 LVPtrVector<ldomXRange>::add( new ldomXRange( words[i] ) ); 2149 } ldomXRangeList(const LVArray<ldomWord> & words)2150 ldomXRangeList( const LVArray<ldomWord> & words ) 2151 { 2152 addWords( words ); 2153 } 2154 /// create list splittiny existing list into non-overlapping ranges 2155 ldomXRangeList( ldomXRangeList & srcList, bool splitIntersections ); 2156 /// create list by filtering existing list, to get only values which intersect filter range 2157 ldomXRangeList( ldomXRangeList & srcList, ldomXRange & filter ); 2158 #if BUILD_LITE!=1 2159 /// fill text selection list by splitting text into monotonic flags ranges 2160 void splitText( ldomMarkedTextList &dst, ldomNode * textNodeToSplit ); 2161 /// fill marked ranges list 2162 void getRanges( ldomMarkedRangeList &dst ); 2163 #endif 2164 /// split into subranges using intersection 2165 void split( ldomXRange * r ); 2166 /// default constructor for empty list ldomXRangeList()2167 ldomXRangeList() {}; 2168 }; 2169 2170 class LVTocItem; 2171 class LVDocView; 2172 2173 /// TOC item 2174 class LVTocItem 2175 { 2176 friend class LVDocView; 2177 private: 2178 LVTocItem * _parent; 2179 ldomDocument * _doc; 2180 lInt32 _level; 2181 lInt32 _index; 2182 lInt32 _page; 2183 lInt32 _percent; 2184 lString32 _name; 2185 lString32 _path; 2186 ldomXPointer _position; 2187 LVPtrVector<LVTocItem> _children; 2188 //==================================================== 2189 //LVTocItem( ldomXPointer pos, const lString32 & name ) : _parent(NULL), _level(0), _index(0), _page(0), _percent(0), _name(name), _path(pos.toString()), _position(pos) { } LVTocItem(ldomXPointer pos,lString32 path,const lString32 & name)2190 LVTocItem( ldomXPointer pos, lString32 path, const lString32 & name ) : _parent(NULL), _level(0), _index(0), _page(0), _percent(0), _name(name), _path(path), _position(pos) { } addChild(LVTocItem * item)2191 void addChild( LVTocItem * item ) { item->_level=_level+1; item->_parent=this; item->_index=_children.length(), item->_doc=_doc; _children.add(item); } 2192 //==================================================== setPage(int n)2193 void setPage( int n ) { _page = n; } setPercent(int n)2194 void setPercent( int n ) { _percent = n; } 2195 public: 2196 /// serialize to byte array (pointer will be incremented by number of bytes written) 2197 bool serialize( SerialBuf & buf ); 2198 /// deserialize from byte array (pointer will be incremented by number of bytes read) 2199 bool deserialize( ldomDocument * doc, SerialBuf & buf ); 2200 /// get page number getPage()2201 int getPage() { return _page; } 2202 /// get position percent * 100 getPercent()2203 int getPercent() { return _percent; } 2204 /// returns parent node pointer getParent()2205 LVTocItem * getParent() const { return _parent; } 2206 /// returns node level (0==root node, 1==top level) getLevel()2207 int getLevel() const { return _level; } 2208 /// returns node index getIndex()2209 int getIndex() const { return _index; } 2210 /// returns section title getName()2211 lString32 getName() const { return _name; } 2212 /// returns position pointer 2213 ldomXPointer getXPointer(); 2214 /// set position pointer (for cases where we need to create a LVTocItem as a container, but 2215 /// we'll know the xpointer only later, mostly always the same xpointer as its first child) setXPointer(ldomXPointer xp)2216 void setXPointer(ldomXPointer xp) { _position = xp; } 2217 /// returns position path 2218 lString32 getPath(); 2219 /// returns Y position 2220 int getY(); 2221 /// returns page number 2222 //int getPageNum( LVRendPageList & pages ); 2223 /// returns child node count getChildCount()2224 int getChildCount() const { return _children.length(); } 2225 /// returns child node by index getChild(int index)2226 LVTocItem * getChild( int index ) const { return _children[index]; } 2227 /// add child TOC node addChild(const lString32 & name,ldomXPointer ptr,lString32 path)2228 LVTocItem * addChild( const lString32 & name, ldomXPointer ptr, lString32 path ) 2229 { 2230 LVTocItem * item = new LVTocItem( ptr, path, name ); 2231 addChild( item ); 2232 return item; 2233 } clear()2234 void clear() { _children.clear(); } 2235 // root node constructor LVTocItem(ldomDocument * doc)2236 LVTocItem( ldomDocument * doc ) : _parent(NULL), _doc(doc), _level(0), _index(0), _page(0) { } ~LVTocItem()2237 ~LVTocItem() { clear(); } 2238 2239 /// For use on the root toc item only (_page, otherwise unused, can be used to store this flag) setAlternativeTocFlag()2240 void setAlternativeTocFlag() { if (_level==0) _page = 1; } hasAlternativeTocFlag()2241 bool hasAlternativeTocFlag() { return _level==0 && _page==1; } 2242 2243 /// When page numbers have been calculated, LVDocView::updatePageNumbers() 2244 /// sets the root toc item _percent to -1. So let's use it to know that fact. hasValidPageNumbers()2245 bool hasValidPageNumbers() { return _level==0 && _percent == -1; } invalidatePageNumbers()2246 void invalidatePageNumbers() { if (_level==0) _percent = 0; } 2247 }; 2248 2249 /// PageMapItem 2250 class LVPageMapItem 2251 { 2252 friend class LVDocView; 2253 friend class LVPageMap; 2254 private: 2255 ldomDocument * _doc; 2256 lInt32 _index; 2257 lInt32 _page; 2258 lInt32 _doc_y; 2259 lString32 _label; 2260 lString32 _path; 2261 ldomXPointer _position; LVPageMapItem(ldomXPointer pos,lString32 path,const lString32 & label)2262 LVPageMapItem( ldomXPointer pos, lString32 path, const lString32 & label ) 2263 : _index(0), _page(0), _doc_y(-1), _label(label), _path(path), _position(pos) 2264 { } setPage(int n)2265 void setPage( int n ) { _page = n; } setDocY(int y)2266 void setDocY( int y ) { _doc_y = y; } 2267 public: 2268 /// serialize to byte array (pointer will be incremented by number of bytes written) 2269 bool serialize( SerialBuf & buf ); 2270 /// deserialize from byte array (pointer will be incremented by number of bytes read) 2271 bool deserialize( ldomDocument * doc, SerialBuf & buf ); 2272 /// get rendered page number getPage()2273 int getPage() { return _page; } 2274 /// returns node index getIndex()2275 int getIndex() const { return _index; } 2276 /// returns page label getLabel()2277 lString32 getLabel() const { return _label; } 2278 /// returns position pointer 2279 ldomXPointer getXPointer(); 2280 /// returns position path 2281 lString32 getPath(); 2282 /// returns Y position 2283 int getDocY(bool refresh=false); LVPageMapItem(ldomDocument * doc)2284 LVPageMapItem( ldomDocument * doc ) : _doc(doc), _index(0), _page(0), _doc_y(-1) { } 2285 }; 2286 2287 /// PageMapItems container 2288 class LVPageMap 2289 { 2290 friend class LVDocView; 2291 private: 2292 ldomDocument * _doc; 2293 bool _page_info_valid; 2294 lString32 _source; 2295 LVPtrVector<LVPageMapItem> _children; addPage(LVPageMapItem * item)2296 void addPage( LVPageMapItem * item ) { 2297 item->_doc = _doc; 2298 item->_index = _children.length(); 2299 _children.add(item); 2300 } 2301 public: 2302 /// serialize to byte array (pointer will be incremented by number of bytes written) 2303 bool serialize( SerialBuf & buf ); 2304 /// deserialize from byte array (pointer will be incremented by number of bytes read) 2305 bool deserialize( ldomDocument * doc, SerialBuf & buf ); 2306 /// returns child node count getChildCount()2307 int getChildCount() const { return _children.length(); } 2308 /// returns child node by index getChild(int index)2309 LVPageMapItem * getChild( int index ) const { return _children[index]; } 2310 /// add page item addPage(const lString32 & label,ldomXPointer ptr,lString32 path)2311 LVPageMapItem * addPage( const lString32 & label, ldomXPointer ptr, lString32 path ) 2312 { 2313 LVPageMapItem * item = new LVPageMapItem( ptr, path, label ); 2314 addPage( item ); 2315 return item; 2316 } clear()2317 void clear() { _children.clear(); } hasValidPageInfo()2318 bool hasValidPageInfo() { return _page_info_valid; } invalidatePageInfo()2319 void invalidatePageInfo() { _page_info_valid = false; } 2320 // Page source (info about the book paper version the page labels reference) setSource(lString32 source)2321 void setSource( lString32 source ) { _source = source; } getSource()2322 lString32 getSource() const { return _source; } 2323 // root node constructor LVPageMap(ldomDocument * doc)2324 LVPageMap( ldomDocument * doc ) 2325 : _doc(doc), _page_info_valid(false) { } ~LVPageMap()2326 ~LVPageMap() { clear(); } 2327 }; 2328 2329 2330 class ldomNavigationHistory 2331 { 2332 private: 2333 lString32Collection _links; 2334 int _pos; clearTail()2335 void clearTail() 2336 { 2337 if (_links.length() > _pos) 2338 _links.erase(_pos, _links.length() - _pos); 2339 } 2340 public: clear()2341 void clear() 2342 { 2343 _links.clear(); 2344 _pos = 0; 2345 } save(lString32 link)2346 bool save( lString32 link ) 2347 { 2348 if (_pos==(int)_links.length() && _pos>0 && _links[_pos-1]==link ) 2349 return false; 2350 if ( _pos>=(int)_links.length() || _links[_pos]!=link ) { 2351 clearTail(); 2352 _links.add( link ); 2353 _pos = _links.length(); 2354 return true; 2355 } else if (_links[_pos]==link) { 2356 _pos++; 2357 return true; 2358 } 2359 return false; 2360 } back()2361 lString32 back() 2362 { 2363 if (_pos==0) 2364 return lString32::empty_str; 2365 return _links[--_pos]; 2366 } forward()2367 lString32 forward() 2368 { 2369 if (_pos>=(int)_links.length()-1) 2370 return lString32::empty_str; 2371 return _links[++_pos]; 2372 } backCount()2373 int backCount() 2374 { 2375 return _pos; 2376 } forwardCount()2377 int forwardCount() 2378 { 2379 return _links.length() - _pos; 2380 } 2381 }; 2382 2383 class ListNumberingProps 2384 { 2385 public: 2386 int maxCounter; 2387 int maxWidth; ListNumberingProps(int c,int w)2388 ListNumberingProps( int c, int w ) 2389 : maxCounter(c), maxWidth(w) 2390 { 2391 } 2392 }; 2393 typedef LVRef<ListNumberingProps> ListNumberingPropsRef; 2394 2395 class ldomDocument : public lxmlDocBase 2396 { 2397 friend class ldomDocumentWriter; 2398 friend class ldomDocumentWriterFilter; 2399 private: 2400 LVTocItem m_toc; 2401 LVPageMap m_pagemap; 2402 #if BUILD_LITE!=1 2403 font_ref_t _def_font; // default font 2404 css_style_ref_t _def_style; 2405 lUInt32 _last_docflags; 2406 int _page_height; 2407 int _page_width; 2408 bool _rendered; 2409 bool _just_rendered_from_cache; 2410 bool _toc_from_cache_valid; 2411 lUInt32 _warnings_seen_bitmap; 2412 ldomXRangeList _selections; 2413 #endif 2414 2415 lString32 _docStylesheetFileName; 2416 2417 LVContainerRef _container; 2418 2419 LVHashTable<lUInt32, ListNumberingPropsRef> lists; 2420 2421 LVEmbeddedFontList _fontList; 2422 2423 2424 #if BUILD_LITE!=1 2425 /// load document cache file content 2426 bool loadCacheFileContent(CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback=NULL); 2427 2428 /// save changes to cache file 2429 bool saveChanges(); 2430 /// saves changes to cache file, limited by time interval (can be called again to continue after TIMEOUT) 2431 virtual ContinuousOperationResult saveChanges( CRTimerUtil & maxTime, LVDocViewCallback * progressCallback=NULL ); 2432 #endif 2433 2434 /// create XPointer from a non-normalized string made by toStringV1() 2435 ldomXPointer createXPointerV1( ldomNode * baseNode, const lString32 & xPointerStr ); 2436 /// create XPointer from a normalized string made by toStringV2() 2437 ldomXPointer createXPointerV2( ldomNode * baseNode, const lString32 & xPointerStr ); 2438 protected: 2439 2440 #if BUILD_LITE!=1 2441 void applyDocumentStyleSheet(); 2442 #endif 2443 2444 public: 2445 2446 #if BUILD_LITE!=1 forceReinitStyles()2447 void forceReinitStyles() { 2448 dropStyles(); 2449 _hdr.render_style_hash = 0; 2450 _rendered = false; 2451 } 2452 2453 ListNumberingPropsRef getNodeNumberingProps( lUInt32 nodeDataIndex ); 2454 void setNodeNumberingProps( lUInt32 nodeDataIndex, ListNumberingPropsRef v ); 2455 void resetNodeNumberingProps(); 2456 #endif 2457 2458 #if BUILD_LITE!=1 2459 /// returns object image stream 2460 LVStreamRef getObjectImageStream( lString32 refName ); 2461 /// returns object image source 2462 LVImageSourceRef getObjectImageSource( lString32 refName ); 2463 isDefStyleSet()2464 bool isDefStyleSet() 2465 { 2466 return !_def_style.isNull(); 2467 } 2468 2469 /// return document's embedded font list getEmbeddedFontList()2470 LVEmbeddedFontList & getEmbeddedFontList() { return _fontList; } 2471 /// register embedded document fonts in font manager, if any exist in document 2472 void registerEmbeddedFonts(); 2473 /// unregister embedded document fonts in font manager, if any exist in document 2474 void unregisterEmbeddedFonts(); 2475 #endif 2476 2477 /// returns pointer to TOC root node getToc()2478 LVTocItem * getToc() { return &m_toc; } 2479 /// build alternative TOC from document heading elements (H1 to H6) and cr-hints, or docFragments 2480 void buildAlternativeToc(); isTocAlternativeToc()2481 bool isTocAlternativeToc() { return m_toc.hasAlternativeTocFlag(); } 2482 /// build TOC from headings 2483 void buildTocFromHeadings(); 2484 2485 /// returns pointer to PageMapItems container getPageMap()2486 LVPageMap * getPageMap() { return &m_pagemap; } 2487 2488 #if BUILD_LITE!=1 isTocFromCacheValid()2489 bool isTocFromCacheValid() { return _toc_from_cache_valid; } 2490 2491 /// save document formatting parameters after render 2492 void updateRenderContext(); 2493 /// check document formatting parameters before render - whether we need to reformat; returns false if render is necessary 2494 bool checkRenderContext(); 2495 #endif 2496 2497 #if BUILD_LITE!=1 2498 /// try opening from cache file, find by source file name (w/o path) and crc32 2499 virtual bool openFromCache( CacheLoadingCallback * formatCallback, LVDocViewCallback * progressCallback=NULL ); 2500 /// saves recent changes to mapped file 2501 virtual ContinuousOperationResult updateMap(CRTimerUtil & maxTime, LVDocViewCallback * progressCallback=NULL); 2502 /// swaps to cache file or saves changes, limited by time interval 2503 virtual ContinuousOperationResult swapToCache( CRTimerUtil & maxTime ); 2504 /// saves recent changes to mapped file 2505 virtual bool updateMap(LVDocViewCallback * progressCallback=NULL) { 2506 CRTimerUtil infinite; 2507 return updateMap(infinite, progressCallback)!=CR_ERROR; // NOLINT: Call to virtual function during destruction 2508 } 2509 #endif 2510 2511 getContainer()2512 LVContainerRef getContainer() { return _container; } setContainer(LVContainerRef cont)2513 void setContainer( LVContainerRef cont ) { _container = cont; } 2514 2515 #if BUILD_LITE!=1 clearRendBlockCache()2516 void clearRendBlockCache() { _renderedBlockCache.clear(); } 2517 #endif 2518 void clear(); getDocStylesheetFileName()2519 lString32 getDocStylesheetFileName() { return _docStylesheetFileName; } setDocStylesheetFileName(lString32 fileName)2520 void setDocStylesheetFileName(lString32 fileName) { _docStylesheetFileName = fileName; } 2521 2522 ldomDocument(); 2523 /// creates empty document which is ready to be copy target of doc partial contents 2524 ldomDocument( ldomDocument & doc ); 2525 2526 #if BUILD_LITE!=1 2527 /// return selections collection getSelections()2528 ldomXRangeList & getSelections() { return _selections; } 2529 2530 /// get full document height 2531 int getFullHeight(); 2532 /// returns page height setting getPageHeight()2533 int getPageHeight() { return _page_height; } 2534 /// returns page width setting getPageWidth()2535 int getPageWidth() { return _page_width; } 2536 #endif 2537 /// saves document contents as XML to stream with specified encoding 2538 bool saveToStream( LVStreamRef stream, const char * codepage, bool treeLayout=false ); 2539 /// print a warning message (only once if warning_id provided, between 1 and 32) 2540 void printWarning(const char * msg, int warning_id=0); 2541 #if BUILD_LITE!=1 2542 /// get default font reference getDefaultFont()2543 font_ref_t getDefaultFont() { return _def_font; } 2544 /// get default style reference getDefaultStyle()2545 css_style_ref_t getDefaultStyle() { return _def_style; } 2546 2547 inline bool parseStyleSheet(lString32 codeBase, lString32 css); 2548 inline bool parseStyleSheet(lString32 cssFile); 2549 #endif 2550 /// destructor 2551 virtual ~ldomDocument(); 2552 #if BUILD_LITE!=1 isRendered()2553 bool isRendered() { return _rendered; } 2554 /// renders (formats) document in memory: returns true if re-rendering needed, false if not 2555 virtual bool render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy, 2556 bool showCover, int y0, font_ref_t def_font, int def_interline_space, 2557 CRPropRef props, int usable_left_overflow=0, int usable_right_overflow=0 ); 2558 /// set global rendering properties 2559 virtual bool setRenderProps( int width, int dy, bool showCover, int y0, font_ref_t def_font, 2560 int def_interline_space, CRPropRef props ); 2561 #endif 2562 /// create xpointer from pointer string 2563 ldomXPointer createXPointer( const lString32 & xPointerStr ); 2564 /// create xpointer from pointer string nodeFromXPath(const lString32 & xPointerStr)2565 ldomNode * nodeFromXPath( const lString32 & xPointerStr ) 2566 { 2567 return createXPointer( xPointerStr ).getNode(); 2568 } 2569 /// get element text by pointer string textFromXPath(const lString32 & xPointerStr)2570 lString32 textFromXPath( const lString32 & xPointerStr ) 2571 { 2572 ldomNode * node = nodeFromXPath( xPointerStr ); 2573 if ( !node ) 2574 return lString32::empty_str; 2575 return node->getText(); 2576 } 2577 2578 /// create xpointer from relative pointer string createXPointer(ldomNode * baseNode,const lString32 & xPointerStr)2579 ldomXPointer createXPointer( ldomNode * baseNode, const lString32 & xPointerStr ) 2580 { 2581 if( _DOMVersionRequested >= DOM_VERSION_WITH_NORMALIZED_XPOINTERS) 2582 return createXPointerV2(baseNode, xPointerStr); 2583 return createXPointerV1(baseNode, xPointerStr); 2584 } 2585 2586 #if BUILD_LITE!=1 2587 /// create xpointer from doc point 2588 ldomXPointer createXPointer( lvPoint pt, int direction=PT_DIR_EXACT, bool strictBounds=false, ldomNode * from_node=NULL ); 2589 /// get rendered block cache object getRendBlockCache()2590 CVRendBlockCache & getRendBlockCache() { return _renderedBlockCache; } 2591 2592 bool findText( lString32 pattern, bool caseInsensitive, bool reverse, int minY, int maxY, LVArray<ldomWord> & words, int maxCount, int maxHeight, int maxHeightCheckStartY = -1 ); 2593 #endif 2594 }; 2595 2596 2597 class ldomDocumentWriter; 2598 2599 class ldomElementWriter 2600 { 2601 ldomElementWriter * _parent; 2602 ldomDocument * _document; 2603 2604 ldomNode * _element; 2605 LVTocItem * _tocItem; 2606 lString32 _path; 2607 const css_elem_def_props_t * _typeDef; 2608 bool _allowText; 2609 bool _isBlock; 2610 bool _isSection; 2611 bool _stylesheetIsSet; 2612 bool _bodyEnterCalled; 2613 int _pseudoElementAfterChildIndex; 2614 lUInt32 _flags; 2615 lUInt32 getFlags(); 2616 void updateTocItem(); 2617 void onBodyEnter(); 2618 void onBodyExit(); getElement()2619 ldomNode * getElement() 2620 { 2621 return _element; 2622 } 2623 lString32 getPath(); 2624 void onText( const lChar32 * text, int len, lUInt32 flags, bool insert_before_last_child=false ); 2625 void addAttribute( lUInt16 nsid, lUInt16 id, const lChar32 * value ); 2626 //lxmlElementWriter * pop( lUInt16 id ); 2627 2628 ldomElementWriter(ldomDocument * document, lUInt16 nsid, lUInt16 id, ldomElementWriter * parent, bool insert_before_last_child=false); 2629 ~ldomElementWriter(); 2630 2631 friend class ldomDocumentWriter; 2632 friend class ldomDocumentWriterFilter; 2633 //friend ldomElementWriter * pop( ldomElementWriter * obj, lUInt16 id ); 2634 }; 2635 2636 /** \brief callback object to fill DOM tree 2637 2638 To be used with XML parser as callback object. 2639 2640 Creates document according to incoming events. 2641 */ 2642 class ldomDocumentWriter : public LVXMLParserCallback 2643 { 2644 protected: 2645 //============================ 2646 ldomDocument * _document; 2647 //ldomElement * _currNode; 2648 ldomElementWriter * _currNode; 2649 bool _errFlag; 2650 bool _headerOnly; 2651 bool _popStyleOnFinish; 2652 lUInt16 _stopTagId; 2653 //============================ 2654 lUInt32 _flags; 2655 bool _inHeadStyle; 2656 lString32 _headStyleText; 2657 lString32Collection _stylesheetLinks; ElementCloseHandler(ldomNode * node)2658 virtual void ElementCloseHandler( ldomNode * node ) { node->persist(); } 2659 public: 2660 /// returns flags getFlags()2661 virtual lUInt32 getFlags() { return _flags; } 2662 /// sets flags setFlags(lUInt32 flags)2663 virtual void setFlags( lUInt32 flags ) { _flags = flags; } 2664 // overrides 2665 /// called when encoding directive found in document 2666 virtual void OnEncoding( const lChar32 * name, const lChar32 * table ); 2667 /// called on parsing start 2668 virtual void OnStart(LVFileFormatParser * parser); 2669 /// called on parsing end 2670 virtual void OnStop(); 2671 /// called on opening tag 2672 virtual ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname ); 2673 /// called after > of opening tag (when entering tag body) 2674 virtual void OnTagBody(); 2675 /// called on closing tag 2676 virtual void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false ); 2677 /// called on attribute 2678 virtual void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue ); 2679 /// close tags 2680 ldomElementWriter * pop( ldomElementWriter * obj, lUInt16 id ); 2681 /// called on text 2682 virtual void OnText( const lChar32 * text, int len, lUInt32 flags ); 2683 /// add named BLOB data to document OnBlob(lString32 name,const lUInt8 * data,int size)2684 virtual bool OnBlob(lString32 name, const lUInt8 * data, int size) { 2685 #if BUILD_LITE!=1 2686 return _document->addBlob(name, data, size); 2687 #else 2688 return false; 2689 #endif 2690 } 2691 /// set document property OnDocProperty(const char * name,lString8 value)2692 virtual void OnDocProperty(const char * name, lString8 value) { _document->getProps()->setString(name, value); } 2693 2694 /// constructor 2695 ldomDocumentWriter(ldomDocument * document, bool headerOnly=false ); 2696 /// destructor 2697 virtual ~ldomDocumentWriter(); 2698 }; 2699 2700 /** \brief callback object to fill DOM tree 2701 2702 To be used with XML parser as callback object. 2703 2704 Creates document according to incoming events. 2705 2706 Autoclose HTML tags. 2707 */ 2708 class ldomDocumentWriterFilter : public ldomDocumentWriter 2709 { 2710 protected: 2711 bool _libRuDocumentToDetect; 2712 bool _libRuDocumentDetected; 2713 bool _libRuParagraphStart; 2714 bool _libRuParseAsPre; 2715 lUInt16 _styleAttrId; 2716 lUInt16 _classAttrId; 2717 lUInt16 * _rules[MAX_ELEMENT_TYPE_ID]; 2718 bool _tagBodyCalled; 2719 // Some states used when gDOMVersionRequested >= 20200824 2720 bool _htmlTagSeen; 2721 bool _headTagSeen; 2722 bool _bodyTagSeen; 2723 bool _curNodeIsSelfClosing; 2724 bool _curTagIsIgnored; 2725 ldomElementWriter * _curNodeBeforeFostering; 2726 ldomElementWriter * _curFosteredNode; 2727 ldomElementWriter * _lastP; 2728 virtual void AutoClose( lUInt16 tag_id, bool open ); 2729 virtual bool AutoOpenClosePop( int step, lUInt16 tag_id ); 2730 virtual lUInt16 popUpTo( ldomElementWriter * target, lUInt16 target_id=0, int scope=0 ); 2731 virtual bool CheckAndEnsureFosterParenting(lUInt16 tag_id); ElementCloseHandler(ldomNode * node)2732 virtual void ElementCloseHandler( ldomNode * node ) { node->persist(); } 2733 virtual void appendStyle( const lChar32 * style ); 2734 virtual void setClass( const lChar32 * className, bool overrideExisting=false ); 2735 public: 2736 /// called on attribute 2737 virtual void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue ); 2738 /// called on opening tag 2739 virtual ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname ); 2740 /// called after > of opening tag (when entering tag body) 2741 virtual void OnTagBody(); 2742 /// called on closing tag 2743 virtual void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false ); 2744 /// called on text 2745 virtual void OnText( const lChar32 * text, int len, lUInt32 flags ); 2746 /// constructor 2747 ldomDocumentWriterFilter(ldomDocument * document, bool headerOnly, const char *** rules); 2748 /// destructor 2749 virtual ~ldomDocumentWriterFilter(); 2750 }; 2751 2752 class ldomDocumentFragmentWriter : public LVXMLParserCallback 2753 { 2754 private: 2755 //============================ 2756 LVXMLParserCallback * parent; 2757 lString32 baseTag; 2758 lString32 baseTagReplacement; 2759 lString32 codeBase; 2760 lString32 filePathName; 2761 lString32 codeBasePrefix; 2762 lString32 stylesheetFile; 2763 lString32 tmpStylesheetFile; 2764 lString32Collection stylesheetLinks; 2765 bool insideTag; 2766 int styleDetectionState; 2767 LVHashTable<lString32, lString32> pathSubstitutions; 2768 2769 ldomNode * baseElement; 2770 ldomNode * lastBaseElement; 2771 2772 lString8 headStyleText; 2773 int headStyleState; 2774 2775 lString32 htmlDir; 2776 lString32 htmlLang; 2777 bool insideHtmlTag; 2778 2779 bool m_nonlinear = false; 2780 2781 public: 2782 2783 /// return content of html/head/style element getHeadStyleText()2784 lString8 getHeadStyleText() { return headStyleText; } 2785 getBaseElement()2786 ldomNode * getBaseElement() { return lastBaseElement; } 2787 2788 lString32 convertId( lString32 id ); 2789 lString32 convertHref( lString32 href ); 2790 addPathSubstitution(lString32 key,lString32 value)2791 void addPathSubstitution( lString32 key, lString32 value ) 2792 { 2793 pathSubstitutions.set(key, value); 2794 } 2795 2796 virtual void setCodeBase( lString32 filePath ); 2797 /// returns flags getFlags()2798 virtual lUInt32 getFlags() { return parent->getFlags(); } 2799 /// sets flags setFlags(lUInt32 flags)2800 virtual void setFlags( lUInt32 flags ) { parent->setFlags(flags); } 2801 // overrides 2802 /// called when encoding directive found in document OnEncoding(const lChar32 * name,const lChar32 * table)2803 virtual void OnEncoding( const lChar32 * name, const lChar32 * table ) 2804 { parent->OnEncoding( name, table ); } 2805 /// called on parsing start OnStart(LVFileFormatParser *)2806 virtual void OnStart(LVFileFormatParser *) 2807 { 2808 insideTag = false; 2809 headStyleText.clear(); 2810 headStyleState = 0; 2811 insideHtmlTag = false; 2812 htmlDir.clear(); 2813 htmlLang.clear(); 2814 } 2815 /// called on parsing end OnStop()2816 virtual void OnStop() 2817 { 2818 if ( insideTag ) { 2819 insideTag = false; 2820 if ( !baseTagReplacement.empty() ) { 2821 parent->OnTagClose(U"", baseTagReplacement.c_str()); 2822 } 2823 baseElement = NULL; 2824 return; 2825 } 2826 insideTag = false; 2827 } 2828 /// called on opening tag 2829 virtual ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname ); 2830 /// called after > of opening tag (when entering tag body) 2831 virtual void OnTagBody(); 2832 /// called on closing tag 2833 virtual void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false ); 2834 /// called on attribute 2835 virtual void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue ); 2836 /// called on text OnText(const lChar32 * text,int len,lUInt32 flags)2837 virtual void OnText( const lChar32 * text, int len, lUInt32 flags ) 2838 { 2839 if (headStyleState == 1) { 2840 headStyleText << UnicodeToUtf8(lString32(text).substr(0,len-1)); 2841 return; 2842 } 2843 if ( insideTag ) 2844 parent->OnText( text, len, flags ); 2845 } 2846 /// add named BLOB data to document OnBlob(lString32 name,const lUInt8 * data,int size)2847 virtual bool OnBlob(lString32 name, const lUInt8 * data, int size) { return parent->OnBlob(name, data, size); } 2848 /// set document property OnDocProperty(const char * name,lString8 value)2849 virtual void OnDocProperty(const char * name, lString8 value) { parent->OnDocProperty(name, value); } 2850 // set non-linear flag setNonLinearFlag(bool nonlinear)2851 virtual void setNonLinearFlag( bool nonlinear ) { m_nonlinear = nonlinear; } 2852 /// constructor ldomDocumentFragmentWriter(LVXMLParserCallback * parentWriter,lString32 baseTagName,lString32 baseTagReplacementName,lString32 fragmentFilePath)2853 ldomDocumentFragmentWriter( LVXMLParserCallback * parentWriter, lString32 baseTagName, lString32 baseTagReplacementName, lString32 fragmentFilePath ) 2854 : parent(parentWriter), baseTag(baseTagName), baseTagReplacement(baseTagReplacementName), 2855 insideTag(false), styleDetectionState(0), pathSubstitutions(100), baseElement(NULL), lastBaseElement(NULL), 2856 headStyleState(0), insideHtmlTag(false) 2857 { 2858 setCodeBase( fragmentFilePath ); 2859 } 2860 /// destructor ~ldomDocumentFragmentWriter()2861 virtual ~ldomDocumentFragmentWriter() { } 2862 }; 2863 2864 //utils 2865 /// extract authors from FB2 document, delimiter is lString32 by default 2866 lString32 extractDocAuthors( ldomDocument * doc, lString32 delimiter=lString32::empty_str, bool shortMiddleName=true ); 2867 lString32 extractDocTitle( ldomDocument * doc ); 2868 lString32 extractDocLanguage( ldomDocument * doc ); 2869 /// returns "(Series Name #number)" if pSeriesNumber is NULL, separate name and number otherwise 2870 lString32 extractDocSeries( ldomDocument * doc, int * pSeriesNumber=NULL ); 2871 lString32 extractDocKeywords( ldomDocument * doc ); 2872 lString32 extractDocDescription( ldomDocument * doc ); 2873 2874 bool IsEmptySpace( const lChar32 * text, int len ); 2875 2876 /// parse XML document from stream, returns NULL if failed 2877 ldomDocument * LVParseXMLStream( LVStreamRef stream, 2878 const elem_def_t * elem_table=NULL, 2879 const attr_def_t * attr_table=NULL, 2880 const ns_def_t * ns_table=NULL ); 2881 2882 /// parse XML document from stream, returns NULL if failed 2883 ldomDocument * LVParseHTMLStream( LVStreamRef stream, 2884 const elem_def_t * elem_table=NULL, 2885 const attr_def_t * attr_table=NULL, 2886 const ns_def_t * ns_table=NULL ); 2887 2888 /// document cache 2889 class ldomDocCache 2890 { 2891 public: 2892 /// open existing cache file stream 2893 static LVStreamRef openExisting( lString32 filename, lUInt32 crc, lUInt32 docFlags, lString32 &cachePath ); 2894 /// create new cache file 2895 static LVStreamRef createNew( lString32 filename, lUInt32 crc, lUInt32 docFlags, lUInt32 fileSize, lString32 &cachePath ); 2896 /// init document cache 2897 static bool init( lString32 cacheDir, lvsize_t maxSize ); 2898 /// close document cache manager 2899 static bool close(); 2900 /// delete all cache files 2901 static bool clear(); 2902 /// returns true if cache is enabled (successfully initialized) 2903 static bool enabled(); 2904 }; 2905 2906 2907 /// unit test for DOM 2908 void runTinyDomUnitTests(); 2909 2910 /// pass true to enable CRC check for 2911 void enableCacheFileContentsValidation(bool enable); 2912 2913 /// pass false to not compress data in cache files 2914 void compressCachedData(bool enable); 2915 2916 /// increase the 4 hardcoded TEXT_CACHE_UNPACKED_SPACE, ELEM_CACHE_UNPACKED_SPACE, 2917 // RECT_CACHE_UNPACKED_SPACE and STYLE_CACHE_UNPACKED_SPACE by this factor 2918 void setStorageMaxUncompressedSizeFactor(float factor); 2919 2920 #endif 2921