1 /*
2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xml.internal.dtm.ref;
22 
23 import com.sun.org.apache.xml.internal.utils.IntVector;
24 import java.util.ArrayList;
25 import java.util.List;
26 
27 /** <p>DTMStringPool is an "interning" mechanism for strings. It will
28  * create a stable 1:1 mapping between a set of string values and a set of
29  * integer index values, so the integers can be used to reliably and
30  * uniquely identify (and when necessary retrieve) the strings.</p>
31  *
32  * <p>Design Priorities:
33  * <ul>
34  * <li>String-to-index lookup speed is critical.</li>
35  * <li>Index-to-String lookup speed is slightly less so.</li>
36  * <li>Threadsafety is not guaranteed at this level.
37  * Enforce that in the application if needed.</li>
38  * <li>Storage efficiency is an issue but not a huge one.
39  * It is expected that string pools won't exceed about 2000 entries.</li>
40  * </ul>
41  * </p>
42  *
43  * <p>Implementation detail: A standard Hashtable is relatively
44  * inefficient when looking up primitive int values, especially when
45  * we're already maintaining an int-to-string vector.  So I'm
46  * maintaining a simple hash chain within this class.</p>
47  *
48  * <p>NOTE: There is nothing in the code that has a real dependency upon
49  * String. It would work with any object type that implements reliable
50  * .hashCode() and .equals() operations. The API enforces Strings because
51  * it's safer that way, but this could trivially be turned into a general
52  * ObjectPool if one was needed.</p>
53  *
54  * <p>Status: Passed basic test in main().</p>
55  *
56  * @LastModified: Oct 2017
57  */
58 public class DTMStringPool
59 {
60   List<String> m_intToString;
61   static final int HASHPRIME=101;
62   int[] m_hashStart=new int[HASHPRIME];
63   IntVector m_hashChain;
64   public static final int NULL=-1;
65 
66   /**
67    * Create a DTMStringPool using the given chain size
68    *
69    * @param chainSize The size of the hash chain vector
70    */
DTMStringPool(int chainSize)71   public DTMStringPool(int chainSize)
72     {
73       m_intToString = new ArrayList<>();
74       m_hashChain= new IntVector(chainSize);
75       removeAllElements();
76 
77       // -sb Add this to force empty strings to be index 0.
78       stringToIndex("");
79     }
80 
DTMStringPool()81   public DTMStringPool()
82     {
83       this(512);
84     }
85 
removeAllElements()86   public void removeAllElements()
87     {
88       m_intToString.clear();
89       for(int i=0;i<HASHPRIME;++i)
90         m_hashStart[i]=NULL;
91       m_hashChain.removeAllElements();
92     }
93 
94   /** @return string whose value is uniquely identified by this integer index.
95    * @throws java.lang.IndexOutOfBoundsException
96    *  if index doesn't map to a string.
97    * */
indexToString(int i)98   public String indexToString(int i)
99     throws java.lang.IndexOutOfBoundsException
100     {
101       if(i==NULL) return null;
102       return m_intToString.get(i);
103     }
104 
105   /** @return integer index uniquely identifying the value of this string. */
stringToIndex(String s)106   public int stringToIndex(String s)
107     {
108       if(s==null) return NULL;
109 
110       int hashslot=s.hashCode()%HASHPRIME;
111       if(hashslot<0) hashslot=-hashslot;
112 
113       // Is it one we already know?
114       int hashlast=m_hashStart[hashslot];
115       int hashcandidate=hashlast;
116       while(hashcandidate!=NULL)
117         {
118           if(m_intToString.get(hashcandidate).equals(s))
119             return hashcandidate;
120 
121           hashlast=hashcandidate;
122           hashcandidate=m_hashChain.elementAt(hashcandidate);
123         }
124 
125       // New value. Add to tables.
126       int newIndex=m_intToString.size();
127       m_intToString.add(s);
128 
129       m_hashChain.addElement(NULL);     // Initialize to no-following-same-hash
130       if(hashlast==NULL)  // First for this hash
131         m_hashStart[hashslot]=newIndex;
132       else // Link from previous with same hash
133         m_hashChain.setElementAt(newIndex,hashlast);
134 
135       return newIndex;
136     }
137 
138   /** Command-line unit test driver. This test relies on the fact that
139    * this version of the pool assigns indices consecutively, starting
140    * from zero, as new unique strings are encountered.
141    */
_main(String[] args)142   public static void _main(String[] args)
143   {
144     String[] word={
145       "Zero","One","Two","Three","Four","Five",
146       "Six","Seven","Eight","Nine","Ten",
147       "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
148       "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
149       "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
150       "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
151       "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
152       "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
153       "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
154 
155     DTMStringPool pool=new DTMStringPool();
156 
157     System.out.println("If no complaints are printed below, we passed initial test.");
158 
159     for(int pass=0;pass<=1;++pass)
160       {
161         int i;
162 
163         for(i=0;i<word.length;++i)
164           {
165             int j=pool.stringToIndex(word[i]);
166             if(j!=i)
167               System.out.println("\tMismatch populating pool: assigned "+
168                                  j+" for create "+i);
169           }
170 
171         for(i=0;i<word.length;++i)
172           {
173             int j=pool.stringToIndex(word[i]);
174             if(j!=i)
175               System.out.println("\tMismatch in stringToIndex: returned "+
176                                  j+" for lookup "+i);
177           }
178 
179         for(i=0;i<word.length;++i)
180           {
181             String w=pool.indexToString(i);
182             if(!word[i].equals(w))
183               System.out.println("\tMismatch in indexToString: returned"+
184                                  w+" for lookup "+i);
185           }
186 
187         pool.removeAllElements();
188 
189         System.out.println("\nPass "+pass+" complete\n");
190       } // end pass loop
191   }
192 }
193