1 /*
2  * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 /*
27  ******************************************************************************
28  * Copyright (C) 1996-2014, International Business Machines Corporation and
29  * others. All Rights Reserved.
30  ******************************************************************************
31  */
32 
33 package jdk.internal.icu.impl;
34 
35 import jdk.internal.icu.text.UTF16;
36 
37 import java.io.DataInputStream;
38 import java.io.InputStream;
39 import java.io.IOException;
40 
41 /**
42  * Trie implementation which stores data in char, 16 bits.
43  * @author synwee
44  * @see com.ibm.icu.impl.Trie
45  * @since release 2.1, Jan 01 2002
46  */
47 
48  // note that i need to handle the block calculations later, since chartrie
49  // in icu4c uses the same index array.
50 public class CharTrie extends Trie
51 {
52     // public constructors ---------------------------------------------
53 
54     /**
55      * <p>Creates a new Trie with the settings for the trie data.</p>
56      * <p>Unserialize the 32-bit-aligned input stream and use the data for the
57      * trie.</p>
58      * @param inputStream file input stream to a ICU data file, containing
59      *                    the trie
60      * @param dataManipulate object which provides methods to parse the char
61      *                        data
62      * @throws IOException thrown when data reading fails
63      * @draft 2.1
64      */
CharTrie(InputStream inputStream, DataManipulate dataManipulate)65     public CharTrie(InputStream inputStream,
66                     DataManipulate dataManipulate) throws IOException
67     {
68         super(inputStream, dataManipulate);
69 
70         if (!isCharTrie()) {
71             throw new IllegalArgumentException(
72                                "Data given does not belong to a char trie.");
73         }
74     }
75 
76     // public methods --------------------------------------------------
77 
78     /**
79      * Gets the value associated with the codepoint.
80      * If no value is associated with the codepoint, a default value will be
81      * returned.
82      * @param ch codepoint
83      * @return offset to data
84      */
getCodePointValue(int ch)85     public final char getCodePointValue(int ch)
86     {
87         int offset;
88 
89         // fastpath for U+0000..U+D7FF
90         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
91             // copy of getRawOffset()
92             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
93                     + (ch & INDEX_STAGE_3_MASK_);
94             return m_data_[offset];
95         }
96 
97         // handle U+D800..U+10FFFF
98         offset = getCodePointOffset(ch);
99 
100         // return -1 if there is an error, in this case we return the default
101         // value: m_initialValue_
102         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
103     }
104 
105     /**
106      * Gets the value to the data which this lead surrogate character points
107      * to.
108      * Returned data may contain folding offset information for the next
109      * trailing surrogate character.
110      * This method does not guarantee correct results for trail surrogates.
111      * @param ch lead surrogate character
112      * @return data value
113      */
getLeadValue(char ch)114     public final char getLeadValue(char ch)
115     {
116        return m_data_[getLeadOffset(ch)];
117     }
118 
119     // protected methods -----------------------------------------------
120 
121     /**
122      * <p>Parses the input stream and stores its trie content into a index and
123      * data array</p>
124      * @param inputStream data input stream containing trie data
125      * @exception IOException thrown when data reading fails
126      */
unserialize(InputStream inputStream)127     protected final void unserialize(InputStream inputStream)
128                                                 throws IOException
129     {
130         DataInputStream input = new DataInputStream(inputStream);
131         int indexDataLength = m_dataOffset_ + m_dataLength_;
132         m_index_ = new char[indexDataLength];
133         for (int i = 0; i < indexDataLength; i ++) {
134             m_index_[i] = input.readChar();
135         }
136         m_data_           = m_index_;
137         m_initialValue_   = m_data_[m_dataOffset_];
138     }
139 
140     /**
141      * Gets the offset to the data which the surrogate pair points to.
142      * @param lead lead surrogate
143      * @param trail trailing surrogate
144      * @return offset to data
145      * @draft 2.1
146      */
getSurrogateOffset(char lead, char trail)147     protected final int getSurrogateOffset(char lead, char trail)
148     {
149         if (m_dataManipulate_ == null) {
150             throw new NullPointerException(
151                              "The field DataManipulate in this Trie is null");
152         }
153 
154         // get fold position for the next trail surrogate
155         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
156 
157         // get the real data from the folded lead/trail units
158         if (offset > 0) {
159             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
160         }
161 
162         // return -1 if there is an error, in this case we return the default
163         // value: m_initialValue_
164         return -1;
165     }
166 
167     // private data members --------------------------------------------
168 
169     /**
170      * Default value
171      */
172     private char m_initialValue_;
173     /**
174      * Array of char data
175      */
176     private char m_data_[];
177 }
178