1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, 13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// See the License for the specific language governing permissions and 15// limitations under the License. 16 17package arrow 18 19import ( 20 "hash/maphash" 21 22 "github.com/apache/arrow/go/v6/arrow/internal/debug" 23) 24 25// Type is a logical type. They can be expressed as 26// either a primitive physical type (bytes or bits of some fixed size), a 27// nested type consisting of other data types, or another data type (e.g. a 28// timestamp encoded as an int64) 29type Type int 30 31const ( 32 // NULL type having no physical storage 33 NULL Type = iota 34 35 // BOOL is a 1 bit, LSB bit-packed ordering 36 BOOL 37 38 // UINT8 is an Unsigned 8-bit little-endian integer 39 UINT8 40 41 // INT8 is a Signed 8-bit little-endian integer 42 INT8 43 44 // UINT16 is an Unsigned 16-bit little-endian integer 45 UINT16 46 47 // INT16 is a Signed 16-bit little-endian integer 48 INT16 49 50 // UINT32 is an Unsigned 32-bit little-endian integer 51 UINT32 52 53 // INT32 is a Signed 32-bit little-endian integer 54 INT32 55 56 // UINT64 is an Unsigned 64-bit little-endian integer 57 UINT64 58 59 // INT64 is a Signed 64-bit little-endian integer 60 INT64 61 62 // FLOAT16 is a 2-byte floating point value 63 FLOAT16 64 65 // FLOAT32 is a 4-byte floating point value 66 FLOAT32 67 68 // FLOAT64 is an 8-byte floating point value 69 FLOAT64 70 71 // STRING is a UTF8 variable-length string 72 STRING 73 74 // BINARY is a Variable-length byte type (no guarantee of UTF8-ness) 75 BINARY 76 77 // FIXED_SIZE_BINARY is a binary where each value occupies the same number of bytes 78 FIXED_SIZE_BINARY 79 80 // DATE32 is int32 days since the UNIX epoch 81 DATE32 82 83 // DATE64 is int64 milliseconds since the UNIX epoch 84 DATE64 85 86 // TIMESTAMP is an exact timestamp encoded with int64 since UNIX epoch 87 // Default unit millisecond 88 TIMESTAMP 89 90 // TIME32 is a signed 32-bit integer, representing either seconds or 91 // milliseconds since midnight 92 TIME32 93 94 // TIME64 is a signed 64-bit integer, representing either microseconds or 95 // nanoseconds since midnight 96 TIME64 97 98 // INTERVAL_MONTHS is YEAR_MONTH interval in SQL style 99 INTERVAL_MONTHS 100 101 // INTERVAL_DAY_TIME is DAY_TIME in SQL Style 102 INTERVAL_DAY_TIME 103 104 // DECIMAL128 is a precision- and scale-based decimal type. Storage type depends on the 105 // parameters. 106 DECIMAL128 107 108 // DECIMAL256 is a precision and scale based decimal type, with 256 bit max. not yet implemented 109 DECIMAL256 110 111 // LIST is a list of some logical data type 112 LIST 113 114 // STRUCT of logical types 115 STRUCT 116 117 // SPARSE_UNION of logical types. not yet implemented 118 SPARSE_UNION 119 120 // DENSE_UNION of logical types. not yet implemented 121 DENSE_UNION 122 123 // DICTIONARY aka Category type 124 DICTIONARY 125 126 // MAP is a repeated struct logical type 127 MAP 128 129 // Custom data type, implemented by user 130 EXTENSION 131 132 // Fixed size list of some logical type 133 FIXED_SIZE_LIST 134 135 // Measure of elapsed time in either seconds, milliseconds, microseconds 136 // or nanoseconds. 137 DURATION 138 139 // like STRING, but 64-bit offsets. not yet implemented 140 LARGE_STRING 141 142 // like BINARY but with 64-bit offsets, not yet implemented 143 LARGE_BINARY 144 145 // like LIST but with 64-bit offsets. not yet implmented 146 LARGE_LIST 147 148 // calendar interval with three fields 149 INTERVAL_MONTH_DAY_NANO 150 151 // INTERVAL could be any of the interval types, kept to avoid breaking anyone 152 // after switching to individual type ids for the interval types that were using 153 // it when calling MakeFromData or NewBuilder 154 // 155 // Deprecated and will be removed in the next major version release 156 INTERVAL 157 158 // Alias to ensure we do not break any consumers 159 DECIMAL = DECIMAL128 160) 161 162// DataType is the representation of an Arrow type. 163type DataType interface { 164 ID() Type 165 // Name is name of the data type. 166 Name() string 167 Fingerprint() string 168} 169 170// FixedWidthDataType is the representation of an Arrow type that 171// requires a fixed number of bits in memory for each element. 172type FixedWidthDataType interface { 173 DataType 174 // BitWidth returns the number of bits required to store a single element of this data type in memory. 175 BitWidth() int 176} 177 178type BinaryDataType interface { 179 DataType 180 binary() 181} 182 183func HashType(seed maphash.Seed, dt DataType) uint64 { 184 var h maphash.Hash 185 h.SetSeed(seed) 186 h.WriteString(dt.Fingerprint()) 187 return h.Sum64() 188} 189 190func typeIDFingerprint(id Type) string { 191 c := string(rune(int(id) + int('A'))) 192 return "@" + c 193} 194 195func typeFingerprint(typ DataType) string { return typeIDFingerprint(typ.ID()) } 196 197func timeUnitFingerprint(unit TimeUnit) rune { 198 switch unit { 199 case Second: 200 return 's' 201 case Millisecond: 202 return 'm' 203 case Microsecond: 204 return 'u' 205 case Nanosecond: 206 return 'n' 207 default: 208 debug.Assert(false, "unexpected time unit") 209 return rune(0) 210 } 211} 212