1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, 13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// See the License for the specific language governing permissions and 15// limitations under the License. 16 17package schema 18 19import ( 20 "fmt" 21 "strings" 22 23 "github.com/apache/arrow/go/v6/parquet" 24 format "github.com/apache/arrow/go/v6/parquet/internal/gen-go/parquet" 25) 26 27// Column encapsulates the information necessary to interpret primitive 28// column data in the context of a particular schema. We have to examine 29// the node structure of a column's path to the root in the schema tree 30// to be able to reassemble the nested structure from the repetition and 31// definition levels. 32type Column struct { 33 pnode *PrimitiveNode 34 // the maximum definition level in this column 35 // if this is > 0 then either this column or a parent column must be optional. 36 maxDefLvl int16 37 // the maximum repetition level in this column 38 // if this is > 0, then either this column or a parent column must be repeated. 39 // when the repetition level in the column data equals this value, it indicates 40 // additional elements in the innermost list. 41 maxRepLvl int16 42} 43 44// NewColumn returns a new column object for the given node with the provided 45// maximum definition and repetition levels. 46func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column { 47 return &Column{n, maxDefinitionLvl, maxRepetitionLvl} 48} 49 50// Name is the column's name 51func (c *Column) Name() string { return c.pnode.Name() } 52 53// ColumnPath returns the full path to this column from the root of the schema 54func (c *Column) ColumnPath() parquet.ColumnPath { return c.pnode.columnPath() } 55 56// Path is equivalent to ColumnPath().String() returning the dot-string version of the path 57func (c *Column) Path() string { return c.pnode.Path() } 58 59// TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column 60func (c *Column) TypeLength() int { return c.pnode.TypeLength() } 61 62func (c *Column) MaxDefinitionLevel() int16 { return c.maxDefLvl } 63func (c *Column) MaxRepetitionLevel() int16 { return c.maxRepLvl } 64func (c *Column) PhysicalType() parquet.Type { return c.pnode.PhysicalType() } 65func (c *Column) ConvertedType() ConvertedType { return c.pnode.convertedType } 66func (c *Column) LogicalType() LogicalType { return c.pnode.logicalType } 67func (c *Column) ColumnOrder() parquet.ColumnOrder { return c.pnode.ColumnOrder } 68func (c *Column) String() string { 69 var bld strings.Builder 70 bld.WriteString("column descriptor = {\n") 71 fmt.Fprintf(&bld, " name: %s,\n", c.Name()) 72 fmt.Fprintf(&bld, " path: %s,\n", c.Path()) 73 fmt.Fprintf(&bld, " physical_type: %s,\n", c.PhysicalType()) 74 fmt.Fprintf(&bld, " converted_type: %s,\n", c.ConvertedType()) 75 fmt.Fprintf(&bld, " logical_type: %s,\n", c.LogicalType()) 76 fmt.Fprintf(&bld, " max_definition_level: %d,\n", c.MaxDefinitionLevel()) 77 fmt.Fprintf(&bld, " max_repetition_level: %d,\n", c.MaxRepetitionLevel()) 78 if c.PhysicalType() == parquet.Types.FixedLenByteArray { 79 fmt.Fprintf(&bld, " length: %d,\n", c.TypeLength()) 80 } 81 if c.ConvertedType() == ConvertedTypes.Decimal { 82 fmt.Fprintf(&bld, " precision: %d,\n scale: %d,\n", c.pnode.decimalMetaData.Precision, c.pnode.decimalMetaData.Scale) 83 } 84 bld.WriteString("}") 85 return bld.String() 86} 87 88// Equals will return true if the rhs Column has the same Max Repetition and Definition levels 89// along with having the same node definition. 90func (c *Column) Equals(rhs *Column) bool { 91 return c.pnode.Equals(rhs.pnode) && 92 c.MaxRepetitionLevel() == rhs.MaxRepetitionLevel() && 93 c.MaxDefinitionLevel() == rhs.MaxDefinitionLevel() 94} 95 96// SchemaNode returns the underlying Node in the schema tree for this column. 97func (c *Column) SchemaNode() Node { 98 return c.pnode 99} 100 101// SortOrder returns the sort order of this column's statistics based on the 102// Logical and Converted types. 103func (c *Column) SortOrder() SortOrder { 104 if c.LogicalType() != nil { 105 return GetLogicalSortOrder(c.LogicalType(), format.Type(c.pnode.PhysicalType())) 106 } 107 return GetSortOrder(c.ConvertedType(), format.Type(c.pnode.PhysicalType())) 108} 109