1 /* 2 * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ****************************************************************************** 28 * Copyright (C) 1996-2014, International Business Machines Corporation and 29 * others. All Rights Reserved. 30 ****************************************************************************** 31 */ 32 33 package jdk.internal.icu.impl; 34 35 import jdk.internal.icu.text.UTF16; 36 37 import java.io.DataInputStream; 38 import java.io.InputStream; 39 import java.io.IOException; 40 41 /** 42 * Trie implementation which stores data in char, 16 bits. 43 * @author synwee 44 * @see com.ibm.icu.impl.Trie 45 * @since release 2.1, Jan 01 2002 46 */ 47 48 // note that i need to handle the block calculations later, since chartrie 49 // in icu4c uses the same index array. 50 public class CharTrie extends Trie 51 { 52 // public constructors --------------------------------------------- 53 54 /** 55 * <p>Creates a new Trie with the settings for the trie data.</p> 56 * <p>Unserialize the 32-bit-aligned input stream and use the data for the 57 * trie.</p> 58 * @param inputStream file input stream to a ICU data file, containing 59 * the trie 60 * @param dataManipulate object which provides methods to parse the char 61 * data 62 * @throws IOException thrown when data reading fails 63 * @draft 2.1 64 */ CharTrie(InputStream inputStream, DataManipulate dataManipulate)65 public CharTrie(InputStream inputStream, 66 DataManipulate dataManipulate) throws IOException 67 { 68 super(inputStream, dataManipulate); 69 70 if (!isCharTrie()) { 71 throw new IllegalArgumentException( 72 "Data given does not belong to a char trie."); 73 } 74 } 75 76 // public methods -------------------------------------------------- 77 78 /** 79 * Gets the value associated with the codepoint. 80 * If no value is associated with the codepoint, a default value will be 81 * returned. 82 * @param ch codepoint 83 * @return offset to data 84 */ getCodePointValue(int ch)85 public final char getCodePointValue(int ch) 86 { 87 int offset; 88 89 // fastpath for U+0000..U+D7FF 90 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 91 // copy of getRawOffset() 92 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) 93 + (ch & INDEX_STAGE_3_MASK_); 94 return m_data_[offset]; 95 } 96 97 // handle U+D800..U+10FFFF 98 offset = getCodePointOffset(ch); 99 100 // return -1 if there is an error, in this case we return the default 101 // value: m_initialValue_ 102 return (offset >= 0) ? m_data_[offset] : m_initialValue_; 103 } 104 105 /** 106 * Gets the value to the data which this lead surrogate character points 107 * to. 108 * Returned data may contain folding offset information for the next 109 * trailing surrogate character. 110 * This method does not guarantee correct results for trail surrogates. 111 * @param ch lead surrogate character 112 * @return data value 113 */ getLeadValue(char ch)114 public final char getLeadValue(char ch) 115 { 116 return m_data_[getLeadOffset(ch)]; 117 } 118 119 // protected methods ----------------------------------------------- 120 121 /** 122 * <p>Parses the input stream and stores its trie content into a index and 123 * data array</p> 124 * @param inputStream data input stream containing trie data 125 * @exception IOException thrown when data reading fails 126 */ unserialize(InputStream inputStream)127 protected final void unserialize(InputStream inputStream) 128 throws IOException 129 { 130 DataInputStream input = new DataInputStream(inputStream); 131 int indexDataLength = m_dataOffset_ + m_dataLength_; 132 m_index_ = new char[indexDataLength]; 133 for (int i = 0; i < indexDataLength; i ++) { 134 m_index_[i] = input.readChar(); 135 } 136 m_data_ = m_index_; 137 m_initialValue_ = m_data_[m_dataOffset_]; 138 } 139 140 /** 141 * Gets the offset to the data which the surrogate pair points to. 142 * @param lead lead surrogate 143 * @param trail trailing surrogate 144 * @return offset to data 145 * @draft 2.1 146 */ getSurrogateOffset(char lead, char trail)147 protected final int getSurrogateOffset(char lead, char trail) 148 { 149 if (m_dataManipulate_ == null) { 150 throw new NullPointerException( 151 "The field DataManipulate in this Trie is null"); 152 } 153 154 // get fold position for the next trail surrogate 155 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); 156 157 // get the real data from the folded lead/trail units 158 if (offset > 0) { 159 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); 160 } 161 162 // return -1 if there is an error, in this case we return the default 163 // value: m_initialValue_ 164 return -1; 165 } 166 167 // private data members -------------------------------------------- 168 169 /** 170 * Default value 171 */ 172 private char m_initialValue_; 173 /** 174 * Array of char data 175 */ 176 private char m_data_[]; 177 } 178