1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.hadoop.hbase.codec.prefixtree.encode.row; 20 21 import java.io.IOException; 22 import java.io.OutputStream; 23 import java.util.ArrayList; 24 import java.util.List; 25 26 import org.apache.hadoop.hbase.classification.InterfaceAudience; 27 import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; 28 import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; 29 import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; 30 import org.apache.hadoop.hbase.util.vint.UFIntTool; 31 32 import com.google.common.collect.Lists; 33 34 /** 35 * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row 36 * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie 37 * also contains references to offsets in the other sections of the data block that enable the 38 * decoder to match a row key with its qualifier, timestamp, type, value, etc. 39 * <p> 40 * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the 41 * internals of each row node. 42 */ 43 @InterfaceAudience.Private 44 public class RowSectionWriter { 45 46 /***************** fields **************************/ 47 48 protected PrefixTreeEncoder prefixTreeEncoder; 49 50 protected PrefixTreeBlockMeta blockMeta; 51 52 protected int numBytes; 53 54 protected ArrayList<TokenizerNode> nonLeaves; 55 protected ArrayList<TokenizerNode> leaves; 56 57 protected ArrayList<RowNodeWriter> leafWriters; 58 protected ArrayList<RowNodeWriter> nonLeafWriters; 59 60 protected int numLeafWriters; 61 protected int numNonLeafWriters; 62 63 64 /********************* construct **********************/ 65 RowSectionWriter()66 public RowSectionWriter() { 67 this.nonLeaves = Lists.newArrayList(); 68 this.leaves = Lists.newArrayList(); 69 this.leafWriters = Lists.newArrayList(); 70 this.nonLeafWriters = Lists.newArrayList(); 71 } 72 RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder)73 public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) { 74 reconstruct(prefixTreeEncoder); 75 } 76 reconstruct(PrefixTreeEncoder prefixTreeEncoder)77 public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) { 78 this.prefixTreeEncoder = prefixTreeEncoder; 79 this.blockMeta = prefixTreeEncoder.getBlockMeta(); 80 reset(); 81 } 82 reset()83 public void reset() { 84 numBytes = 0; 85 nonLeaves.clear(); 86 leaves.clear(); 87 numLeafWriters = 0; 88 numNonLeafWriters = 0; 89 } 90 91 92 /****************** methods *******************************/ 93 compile()94 public RowSectionWriter compile() { 95 blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength()); 96 prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes(); 97 98 prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false); 99 prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true); 100 101 // track the starting position of each node in final output 102 int negativeIndex = 0; 103 104 // create leaf writer nodes 105 // leaf widths are known at this point, so add them up 106 int totalLeafBytes = 0; 107 for (int i = leaves.size() - 1; i >= 0; --i) { 108 TokenizerNode leaf = leaves.get(i); 109 RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf); 110 ++numLeafWriters; 111 // leaves store all but their first token byte 112 int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0); 113 totalLeafBytes += leafNodeWidth; 114 negativeIndex += leafNodeWidth; 115 leaf.setNegativeIndex(negativeIndex); 116 } 117 118 int totalNonLeafBytesWithoutOffsets = 0; 119 int totalChildPointers = 0; 120 for (int i = nonLeaves.size() - 1; i >= 0; --i) { 121 TokenizerNode nonLeaf = nonLeaves.get(i); 122 RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf); 123 ++numNonLeafWriters; 124 totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0); 125 totalChildPointers += nonLeaf.getNumChildren(); 126 } 127 128 // figure out how wide our offset FInts are 129 int offsetWidth = 0; 130 while (true) { 131 ++offsetWidth; 132 int offsetBytes = totalChildPointers * offsetWidth; 133 int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes; 134 if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) { 135 // it fits 136 numBytes = totalRowBytes; 137 break; 138 } 139 } 140 blockMeta.setNextNodeOffsetWidth(offsetWidth); 141 142 // populate negativeIndexes 143 for (int i = nonLeaves.size() - 1; i >= 0; --i) { 144 TokenizerNode nonLeaf = nonLeaves.get(i); 145 int writerIndex = nonLeaves.size() - i - 1; 146 RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex); 147 int nodeWidth = nonLeafWriter.calculateWidth(); 148 negativeIndex += nodeWidth; 149 nonLeaf.setNegativeIndex(negativeIndex); 150 } 151 152 return this; 153 } 154 initializeWriter(List<RowNodeWriter> list, int index, TokenizerNode builderNode)155 protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index, 156 TokenizerNode builderNode) { 157 RowNodeWriter rowNodeWriter = null; 158 //check if there is an existing node we can recycle 159 if (index >= list.size()) { 160 //there are not enough existing nodes, so add a new one which will be retrieved below 161 list.add(new RowNodeWriter(prefixTreeEncoder, builderNode)); 162 } 163 rowNodeWriter = list.get(index); 164 rowNodeWriter.reset(builderNode); 165 return rowNodeWriter; 166 } 167 168 writeBytes(OutputStream os)169 public void writeBytes(OutputStream os) throws IOException { 170 for (int i = numNonLeafWriters - 1; i >= 0; --i) { 171 RowNodeWriter nonLeafWriter = nonLeafWriters.get(i); 172 nonLeafWriter.write(os); 173 } 174 // duplicates above... written more for clarity right now 175 for (int i = numLeafWriters - 1; i >= 0; --i) { 176 RowNodeWriter leafWriter = leafWriters.get(i); 177 leafWriter.write(os); 178 } 179 } 180 181 182 /***************** static ******************************/ 183 filterByLeafAndReverse( ArrayList<TokenizerNode> ins, boolean leaves)184 protected static ArrayList<TokenizerNode> filterByLeafAndReverse( 185 ArrayList<TokenizerNode> ins, boolean leaves) { 186 ArrayList<TokenizerNode> outs = Lists.newArrayList(); 187 for (int i = ins.size() - 1; i >= 0; --i) { 188 TokenizerNode n = ins.get(i); 189 if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) { 190 outs.add(ins.get(i)); 191 } 192 } 193 return outs; 194 } 195 196 197 /************* get/set **************************/ 198 getNumBytes()199 public int getNumBytes() { 200 return numBytes; 201 } 202 getNonLeaves()203 public ArrayList<TokenizerNode> getNonLeaves() { 204 return nonLeaves; 205 } 206 getLeaves()207 public ArrayList<TokenizerNode> getLeaves() { 208 return leaves; 209 } 210 getNonLeafWriters()211 public ArrayList<RowNodeWriter> getNonLeafWriters() { 212 return nonLeafWriters; 213 } 214 getLeafWriters()215 public ArrayList<RowNodeWriter> getLeafWriters() { 216 return leafWriters; 217 } 218 219 } 220