1 /* 2 * Copyright (C) 2010 Adam Barth. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_SUFFIX_TREE_H_ 27 #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_SUFFIX_TREE_H_ 28 29 #include <algorithm> 30 #include <utility> 31 32 #include "third_party/blink/renderer/platform/wtf/allocator/allocator.h" 33 #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h" 34 #include "third_party/blink/renderer/platform/wtf/vector.h" 35 36 namespace blink { 37 38 class UnicodeCodebook { 39 STATIC_ONLY(UnicodeCodebook); 40 41 public: CodeWord(UChar c)42 static int CodeWord(UChar c) { return c; } 43 enum { kCodeSize = 1 << 8 * sizeof(UChar) }; 44 }; 45 46 class ASCIICodebook { 47 STATIC_ONLY(ASCIICodebook); 48 49 public: CodeWord(UChar c)50 static int CodeWord(UChar c) { return c & (kCodeSize - 1); } 51 enum { kCodeSize = 1 << (8 * sizeof(char) - 1) }; 52 }; 53 54 template <typename Codebook> 55 class SuffixTree { 56 USING_FAST_MALLOC(SuffixTree); 57 58 public: SuffixTree(const String & text,unsigned depth)59 SuffixTree(const String& text, unsigned depth) : depth_(depth), leaf_(true) { 60 Build(text); 61 } 62 MightContain(const String & query)63 bool MightContain(const String& query) { 64 Node* current = &root_; 65 int limit = std::min(depth_, query.length()); 66 for (int i = 0; i < limit; ++i) { 67 auto iter = current->Find(Codebook::CodeWord(query[i])); 68 if (iter == current->End()) 69 return false; 70 current = iter->second; 71 } 72 return true; 73 } 74 75 private: 76 class Node { 77 USING_FAST_MALLOC(Node); 78 79 public: is_leaf_(is_leaf)80 Node(bool is_leaf = false) : is_leaf_(is_leaf) {} 81 ~Node()82 ~Node() { 83 for (const auto& pair : children_) { 84 Node* child = pair.second; 85 if (child && !child->is_leaf_) 86 delete child; 87 } 88 } 89 At(int key)90 Node*& At(int key) { 91 auto it = Find(key); 92 if (it != children_.end()) 93 return it->second; 94 children_.emplace_back(key, nullptr); 95 return children_.back().second; 96 } 97 Find(int key)98 typename Vector<std::pair<int, Node*>>::iterator Find(int key) { 99 return std::find_if(children_.begin(), children_.end(), 100 [key](const std::pair<int, Node*>& entry) { 101 return entry.first == key; 102 }); 103 } 104 End()105 typename Vector<std::pair<int, Node*>>::iterator End() { 106 return children_.end(); 107 } 108 109 private: 110 // TODO(tsepez): convert to base::flat_map when allowed in blink. 111 Vector<std::pair<int, Node*>> children_; 112 const bool is_leaf_; 113 114 DISALLOW_COPY_AND_ASSIGN(Node); 115 }; 116 Build(const String & text)117 void Build(const String& text) { 118 for (unsigned base = 0; base < text.length(); ++base) { 119 Node* current = &root_; 120 unsigned limit = std::min(base + depth_, text.length()); 121 for (unsigned offset = 0; base + offset < limit; ++offset) { 122 DCHECK_NE(current, &leaf_); 123 Node*& child = current->At(Codebook::CodeWord(text[base + offset])); 124 if (!child) 125 child = base + offset + 1 == limit ? &leaf_ : new Node(); 126 current = child; 127 } 128 } 129 } 130 131 Node root_; 132 unsigned depth_; 133 134 // Instead of allocating a fresh empty leaf node for ever leaf in the tree 135 // (there can be a lot of these), we alias all the leaves to this "static" 136 // leaf node. 137 Node leaf_; 138 139 DISALLOW_COPY_AND_ASSIGN(SuffixTree); 140 }; 141 142 } // namespace blink 143 144 #endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_SUFFIX_TREE_H_ 145