1 // Copyright 2010-2018, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 #ifndef MOZC_COMPOSER_INTERNAL_CHAR_CHUNK_H_ 31 #define MOZC_COMPOSER_INTERNAL_CHAR_CHUNK_H_ 32 33 #include <set> 34 #include <string> 35 36 #include "base/port.h" 37 #include "composer/internal/transliterators.h" 38 // For TableAttributes 39 #include "composer/table.h" 40 // for FRIEND_TEST() 41 #include "testing/base/public/gunit_prod.h" 42 43 namespace mozc { 44 namespace composer { 45 46 class CompositionInput; 47 class Table; 48 49 // This class contains a unit of composition string. The unit consists of 50 // conversion, pending and raw strings. Every unit should be the shortest 51 // size separated by the conversion table. A sample units with normal 52 // romaji-hiragana conversion table are {conversion: "か", pending: "", raw: 53 // "ka"} and {conversion: "っ", pending: "t", raw: "tt"}. 54 class CharChunk { 55 public: 56 // LOCAL transliterator is not accepted. 57 CharChunk(Transliterators::Transliterator transliterator, 58 const Table *table); 59 60 void Clear(); 61 62 size_t GetLength(Transliterators::Transliterator transliterator) const; 63 64 // Append the characters representing this CharChunk accoring to the 65 // transliterator. If the transliterator is LOCAL, the local 66 // transliterator specified via SetTransliterator is used. 67 void AppendResult(Transliterators::Transliterator transliterator, 68 string *result) const; 69 void AppendTrimedResult(Transliterators::Transliterator transliterator, 70 string *result) const; 71 void AppendFixedResult(Transliterators::Transliterator transliterator, 72 string *result) const; 73 74 // Get possible results from current chunk 75 void GetExpandedResults(std::set<string> *results) const; 76 bool IsFixed() const; 77 78 // True if IsAppendable() is true and this object is fixed (|pending_|=="") 79 // when |input| is appended. 80 bool IsConvertible( 81 Transliterators::Transliterator transliterator, 82 const Table *table, 83 const string &input) const; 84 85 // Combines all fields with |left_chunk|. 86 // [this chunk] := [left_chunk]+[this chunk] 87 // Note that after calling this method, 88 // the information contained in |left_chunk| duplicates. 89 // Deleting |left_chunk| would be preferable. 90 void Combine(const CharChunk& left_chunk); 91 92 // Return true if this char chunk accepts additional characters with 93 // the specified transliterator and the table. 94 bool IsAppendable(Transliterators::Transliterator transliterator, 95 const Table *table) const; 96 97 // Split CharChunk at |position| and set split new chunk to |left_new_chunk|. 98 // CharChunk doesn't have ownership of the new chunk. 99 bool SplitChunk(Transliterators::Transliterator transliterator, 100 size_t position, 101 CharChunk **left_new_chunk); 102 103 // Return true if this chunk should be commited immediately. This 104 // function refers DIRECT_INPUT attribute. 105 bool ShouldCommit() const; 106 107 bool ShouldInsertNewChunk(const CompositionInput &input) const; 108 void AddInput(string *input); 109 void AddConvertedChar(string *input); 110 void AddInputAndConvertedChar(string *key, 111 string *converted_char); 112 void AddCompositionInput(CompositionInput *input); 113 114 void SetTransliterator(Transliterators::Transliterator transliterator); 115 116 // Gets a transliterator basing on the given |transliterator|. 117 // - If |transliterator| is |Transliterators::LOCAL|, the local transliterator 118 // is returned. But if NO_TRANSLITERATION attribute is set, 119 // |Transliterators::CONVERSION_STRING| is returned. 120 // This behavior is for mobile. Without this behavior, raw string is used 121 // on unexpected situation. For example, on 12keys-toggle-alphabet mode, 122 // a user types "2223" to get "cd". In this case local transliterator is 123 // HALF_ASCII and HALF_ASCII transliterator uses raw string 124 // so withtout NO_TRANSLITERATION a user will get "2223" as preedit. 125 // - If |transliterator| is not LOCAL, given |transliterator| is returned. 126 // But if NO_TRANSLITERATION attribute is set and |transliterator| is 127 // HALF_ASCII or FULL_ASCII, |Transliterators::CONVERSION_STRING| 128 // is returned. 129 // NO_TRANSLITERATION means that raw input is (basically) meaningless 130 // so HALF_ASCII and FULL_ASCII, which uses raw input, should not be used. 131 Transliterators::Transliterator GetTransliterator( 132 Transliterators::Transliterator transliterator) const; 133 134 string Transliterate(Transliterators::Transliterator transliterator, 135 const string &raw, const string &converted) const; 136 137 // Test only 138 const string &raw() const; 139 // Test only 140 void set_raw(const string &raw); 141 142 // Test only 143 const string &conversion() const; 144 // Test only 145 void set_conversion(const string &conversion); 146 147 // Test only 148 const string &pending() const; 149 // Test only 150 void set_pending(const string &pending); 151 152 // Test only 153 const string &ambiguous() const; 154 // Test only 155 void set_ambiguous(const string &ambiguous); 156 157 CharChunk *Clone() const; 158 159 // Test only 160 bool AddInputInternal(string *input); 161 162 private: 163 FRIEND_TEST(CharChunkTest, Clone); 164 FRIEND_TEST(CharChunkTest, GetTransliterator); 165 166 Transliterators::Transliterator transliterator_; 167 const Table *table_; 168 169 string raw_; 170 string conversion_; 171 string pending_; 172 string ambiguous_; 173 TableAttributes attributes_; 174 }; 175 176 } // namespace composer 177 } // namespace mozc 178 179 #endif // MOZC_COMPOSER_INTERNAL_CHAR_CHUNK_H_ 180