1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18 Arrow.Struct 19 20An `ArrowVector` where each element is a "struct" of some kind with ordered, named fields, like a `NamedTuple{names, types}` or regular julia `struct`. 21""" 22struct Struct{T, S} <: ArrowVector{T} 23 validity::ValidityBitmap 24 data::S # Tuple of ArrowVector 25 ℓ::Int 26 metadata::Union{Nothing, Dict{String, String}} 27end 28 29Base.size(s::Struct) = (s.ℓ,) 30 31@propagate_inbounds function Base.getindex(s::Struct{T}, i::Integer) where {T} 32 @boundscheck checkbounds(s, i) 33 NT = Base.nonmissingtype(T) 34 if ArrowTypes.structtype(NT) === ArrowTypes.NAMEDTUPLE 35 if NT !== T 36 return s.validity[i] ? NT(ntuple(j->s.data[j][i], fieldcount(NT))) : missing 37 else 38 return NT(ntuple(j->s.data[j][i], fieldcount(NT))) 39 end 40 elseif ArrowTypes.structtype(NT) === ArrowTypes.STRUCT 41 if NT !== T 42 return s.validity[i] ? NT(ntuple(j->s.data[j][i], fieldcount(NT))...) : missing 43 else 44 return NT(ntuple(j->s.data[j][i], fieldcount(NT))...) 45 end 46 end 47end 48 49@propagate_inbounds function Base.setindex!(s::Struct{T}, v::T, i::Integer) where {T} 50 @boundscheck checkbounds(s, i) 51 if v === missing 52 @inbounds s.validity[i] = false 53 else 54 NT = Base.nonmissingtype(T) 55 N = fieldcount(NT) 56 foreach(1:N) do j 57 @inbounds s.data[j][i] = getfield(v, j) 58 end 59 end 60 return v 61end 62 63struct ToStruct{T, i, A} <: AbstractVector{T} 64 data::A # eltype is NamedTuple or some struct 65end 66 67ToStruct(x::A, j::Integer) where {A} = ToStruct{fieldtype(Base.nonmissingtype(eltype(A)), j), j, A}(x) 68 69Base.IndexStyle(::Type{<:ToStruct}) = Base.IndexLinear() 70Base.size(x::ToStruct) = (length(x.data),) 71 72Base.@propagate_inbounds function Base.getindex(A::ToStruct{T, j}, i::Integer) where {T, j} 73 @boundscheck checkbounds(A, i) 74 @inbounds x = A.data[i] 75 return x === missing ? ArrowTypes.default(T) : getfield(x, j) 76end 77 78arrowvector(::StructType, x::Struct, i, nl, fi, de, ded, meta; kw...) = x 79 80function arrowvector(::StructType, x, i, nl, fi, de, ded, meta; kw...) 81 len = length(x) 82 validity = ValidityBitmap(x) 83 T = Base.nonmissingtype(eltype(x)) 84 if ArrowTypes.structtype(T) === ArrowTypes.STRUCT 85 meta = meta === nothing ? Dict{String, String}() : meta 86 ArrowTypes.registertype!(T, T) 87 ArrowTypes.getarrowtype!(meta, T) 88 end 89 data = Tuple(arrowvector(ToStruct(x, j), i, nl + 1, j, de, ded, nothing; kw...) for j = 1:fieldcount(T)) 90 return Struct{eltype(x), typeof(data)}(validity, data, len, meta) 91end 92 93function compress(Z::Meta.CompressionType, comp, x::A) where {A <: Struct} 94 len = length(x) 95 nc = nullcount(x) 96 validity = compress(Z, comp, x.validity) 97 buffers = [validity] 98 children = Compressed[] 99 for y in x.data 100 push!(children, compress(Z, comp, y)) 101 end 102 return Compressed{Z, A}(x, buffers, len, nc, children) 103end 104 105function makenodesbuffers!(col::Struct{T}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T} 106 len = length(col) 107 nc = nullcount(col) 108 push!(fieldnodes, FieldNode(len, nc)) 109 @debug 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)" 110 # validity bitmap 111 blen = nc == 0 ? 0 : bitpackedbytes(len, alignment) 112 push!(fieldbuffers, Buffer(bufferoffset, blen)) 113 @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))" 114 bufferoffset += blen 115 for child in col.data 116 bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment) 117 end 118 return bufferoffset 119end 120 121function writebuffer(io, col::Struct, alignment) 122 @debug 1 "writebuffer: col = $(typeof(col))" 123 @debug 2 col 124 writebitmap(io, col, alignment) 125 # write values arrays 126 for child in col.data 127 writebuffer(io, child, alignment) 128 end 129 return 130end 131