1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18 Arrow.Map 19 20An `ArrowVector` where each element is a "map" of some kind, like a `Dict`. 21""" 22struct Map{T, O, A} <: ArrowVector{T} 23 validity::ValidityBitmap 24 offsets::Offsets{O} 25 data::A 26 ℓ::Int 27 metadata::Union{Nothing, Dict{String, String}} 28end 29 30Base.size(l::Map) = (l.ℓ,) 31 32@propagate_inbounds function Base.getindex(l::Map{T}, i::Integer) where {T} 33 @boundscheck checkbounds(l, i) 34 @inbounds lo, hi = l.offsets[i] 35 if Base.nonmissingtype(T) !== T 36 return l.validity[i] ? ArrowTypes.arrowconvert(T, Dict(x.key => x.value for x in view(l.data, lo:hi))) : missing 37 else 38 return ArrowTypes.arrowconvert(T, Dict(x.key => x.value for x in view(l.data, lo:hi))) 39 end 40end 41 42keyvalues(KT, ::Missing) = missing 43keyvalues(KT, x::AbstractDict) = [KT(k, v) for (k, v) in pairs(x)] 44 45arrowvector(::MapType, x::Map, i, nl, fi, de, ded, meta; kw...) = x 46 47function arrowvector(::MapType, x, i, nl, fi, de, ded, meta; largelists::Bool=false, kw...) 48 len = length(x) 49 validity = ValidityBitmap(x) 50 ET = eltype(x) 51 DT = Base.nonmissingtype(ET) 52 KT = KeyValue{keytype(DT), valtype(DT)} 53 VT = Vector{KT} 54 T = DT !== ET ? Union{Missing, VT} : VT 55 flat = ToList(T[keyvalues(KT, y) for y in x]; largelists=largelists) 56 offsets = Offsets(UInt8[], flat.inds) 57 data = arrowvector(flat, i, nl + 1, fi, de, ded, nothing; lareglists=largelists, kw...) 58 return Map{ET, eltype(flat.inds), typeof(data)}(validity, offsets, data, len, meta) 59end 60 61function compress(Z::Meta.CompressionType, comp, x::A) where {A <: Map} 62 len = length(x) 63 nc = nullcount(x) 64 validity = compress(Z, comp, x.validity) 65 offsets = compress(Z, comp, x.offsets.offsets) 66 buffers = [validity, offsets] 67 children = Compressed[] 68 push!(children, compress(Z, comp, x.data)) 69 return Compressed{Z, A}(x, buffers, len, nc, children) 70end 71 72function makenodesbuffers!(col::Union{Map{T, O, A}, List{T, O, A}}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, O, A} 73 len = length(col) 74 nc = nullcount(col) 75 push!(fieldnodes, FieldNode(len, nc)) 76 @debug 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)" 77 # validity bitmap 78 blen = nc == 0 ? 0 : bitpackedbytes(len, alignment) 79 push!(fieldbuffers, Buffer(bufferoffset, blen)) 80 @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))" 81 # adjust buffer offset, make array buffer 82 bufferoffset += blen 83 blen = sizeof(O) * (len + 1) 84 push!(fieldbuffers, Buffer(bufferoffset, blen)) 85 @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))" 86 bufferoffset += padding(blen, alignment) 87 if eltype(A) == UInt8 88 blen = length(col.data) 89 push!(fieldbuffers, Buffer(bufferoffset, blen)) 90 @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))" 91 bufferoffset += padding(blen, alignment) 92 else 93 bufferoffset = makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment) 94 end 95 return bufferoffset 96end 97 98function writebuffer(io, col::Union{Map{T, O, A}, List{T, O, A}}, alignment) where {T, O, A} 99 @debug 1 "writebuffer: col = $(typeof(col))" 100 @debug 2 col 101 writebitmap(io, col, alignment) 102 # write offsets 103 n = writearray(io, O, col.offsets.offsets) 104 @debug 1 "writing array: col = $(typeof(col.offsets.offsets)), n = $n, padded = $(padding(n, alignment))" 105 writezeros(io, paddinglength(n, alignment)) 106 # write values array 107 if eltype(A) == UInt8 108 n = writearray(io, UInt8, col.data) 109 @debug 1 "writing array: col = $(typeof(col.data)), n = $n, padded = $(padding(n, alignment))" 110 writezeros(io, paddinglength(n, alignment)) 111 else 112 writebuffer(io, col.data, alignment) 113 end 114 return 115end 116