1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
20 
21 import java.io.IOException;
22 import java.io.OutputStream;
23 import java.util.ArrayList;
24 import java.util.List;
25 
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
28 import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
29 import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
30 import org.apache.hadoop.hbase.util.vint.UFIntTool;
31 
32 import com.google.common.collect.Lists;
33 
34 /**
35  * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
36  * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
37  * also contains references to offsets in the other sections of the data block that enable the
38  * decoder to match a row key with its qualifier, timestamp, type, value, etc.
39  * <p>
40  * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
41  * internals of each row node.
42  */
43 @InterfaceAudience.Private
44 public class RowSectionWriter {
45 
46   /***************** fields **************************/
47 
48   protected PrefixTreeEncoder prefixTreeEncoder;
49 
50   protected PrefixTreeBlockMeta blockMeta;
51 
52   protected int numBytes;
53 
54   protected ArrayList<TokenizerNode> nonLeaves;
55   protected ArrayList<TokenizerNode> leaves;
56 
57   protected ArrayList<RowNodeWriter> leafWriters;
58   protected ArrayList<RowNodeWriter> nonLeafWriters;
59 
60   protected int numLeafWriters;
61   protected int numNonLeafWriters;
62 
63 
64   /********************* construct **********************/
65 
RowSectionWriter()66   public RowSectionWriter() {
67     this.nonLeaves = Lists.newArrayList();
68     this.leaves = Lists.newArrayList();
69     this.leafWriters = Lists.newArrayList();
70     this.nonLeafWriters = Lists.newArrayList();
71   }
72 
RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder)73   public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
74     reconstruct(prefixTreeEncoder);
75   }
76 
reconstruct(PrefixTreeEncoder prefixTreeEncoder)77   public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
78     this.prefixTreeEncoder = prefixTreeEncoder;
79     this.blockMeta = prefixTreeEncoder.getBlockMeta();
80     reset();
81   }
82 
reset()83   public void reset() {
84     numBytes = 0;
85     nonLeaves.clear();
86     leaves.clear();
87     numLeafWriters = 0;
88     numNonLeafWriters = 0;
89   }
90 
91 
92   /****************** methods *******************************/
93 
compile()94   public RowSectionWriter compile() {
95     blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
96     prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
97 
98     prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
99     prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
100 
101     // track the starting position of each node in final output
102     int negativeIndex = 0;
103 
104     // create leaf writer nodes
105     // leaf widths are known at this point, so add them up
106     int totalLeafBytes = 0;
107     for (int i = leaves.size() - 1; i >= 0; --i) {
108       TokenizerNode leaf = leaves.get(i);
109       RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
110       ++numLeafWriters;
111       // leaves store all but their first token byte
112       int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
113       totalLeafBytes += leafNodeWidth;
114       negativeIndex += leafNodeWidth;
115       leaf.setNegativeIndex(negativeIndex);
116     }
117 
118     int totalNonLeafBytesWithoutOffsets = 0;
119     int totalChildPointers = 0;
120     for (int i = nonLeaves.size() - 1; i >= 0; --i) {
121       TokenizerNode nonLeaf = nonLeaves.get(i);
122       RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
123       ++numNonLeafWriters;
124       totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
125       totalChildPointers += nonLeaf.getNumChildren();
126     }
127 
128     // figure out how wide our offset FInts are
129     int offsetWidth = 0;
130     while (true) {
131       ++offsetWidth;
132       int offsetBytes = totalChildPointers * offsetWidth;
133       int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
134       if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
135         // it fits
136         numBytes = totalRowBytes;
137         break;
138       }
139     }
140     blockMeta.setNextNodeOffsetWidth(offsetWidth);
141 
142     // populate negativeIndexes
143     for (int i = nonLeaves.size() - 1; i >= 0; --i) {
144       TokenizerNode nonLeaf = nonLeaves.get(i);
145       int writerIndex = nonLeaves.size() - i - 1;
146       RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
147       int nodeWidth = nonLeafWriter.calculateWidth();
148       negativeIndex += nodeWidth;
149       nonLeaf.setNegativeIndex(negativeIndex);
150     }
151 
152     return this;
153   }
154 
initializeWriter(List<RowNodeWriter> list, int index, TokenizerNode builderNode)155   protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
156       TokenizerNode builderNode) {
157     RowNodeWriter rowNodeWriter = null;
158     //check if there is an existing node we can recycle
159     if (index >= list.size()) {
160       //there are not enough existing nodes, so add a new one which will be retrieved below
161       list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
162     }
163     rowNodeWriter = list.get(index);
164     rowNodeWriter.reset(builderNode);
165     return rowNodeWriter;
166   }
167 
168 
writeBytes(OutputStream os)169   public void writeBytes(OutputStream os) throws IOException {
170     for (int i = numNonLeafWriters - 1; i >= 0; --i) {
171       RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
172       nonLeafWriter.write(os);
173     }
174     // duplicates above... written more for clarity right now
175     for (int i = numLeafWriters - 1; i >= 0; --i) {
176       RowNodeWriter leafWriter = leafWriters.get(i);
177       leafWriter.write(os);
178     }
179   }
180 
181 
182   /***************** static ******************************/
183 
filterByLeafAndReverse( ArrayList<TokenizerNode> ins, boolean leaves)184   protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
185       ArrayList<TokenizerNode> ins, boolean leaves) {
186     ArrayList<TokenizerNode> outs = Lists.newArrayList();
187     for (int i = ins.size() - 1; i >= 0; --i) {
188       TokenizerNode n = ins.get(i);
189       if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
190         outs.add(ins.get(i));
191       }
192     }
193     return outs;
194   }
195 
196 
197   /************* get/set **************************/
198 
getNumBytes()199   public int getNumBytes() {
200     return numBytes;
201   }
202 
getNonLeaves()203   public ArrayList<TokenizerNode> getNonLeaves() {
204     return nonLeaves;
205   }
206 
getLeaves()207   public ArrayList<TokenizerNode> getLeaves() {
208     return leaves;
209   }
210 
getNonLeafWriters()211   public ArrayList<RowNodeWriter> getNonLeafWriters() {
212     return nonLeafWriters;
213   }
214 
getLeafWriters()215   public ArrayList<RowNodeWriter> getLeafWriters() {
216     return leafWriters;
217   }
218 
219 }
220