1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17using Test, Arrow, Tables, Dates, PooledArrays, TimeZones, UUIDs 18 19include(joinpath(dirname(pathof(Arrow)), "../test/testtables.jl")) 20include(joinpath(dirname(pathof(Arrow)), "../test/integrationtest.jl")) 21include(joinpath(dirname(pathof(Arrow)), "../test/dates.jl")) 22 23struct CustomStruct 24 x::Int 25 y::Float64 26 z::String 27end 28 29@testset "Arrow" begin 30 31@testset "table roundtrips" begin 32 33for case in testtables 34 testtable(case...) 35end 36 37end # @testset "table roundtrips" 38 39@testset "arrow json integration tests" begin 40 41for file in readdir(joinpath(dirname(pathof(Arrow)), "../test/arrowjson")) 42 jsonfile = joinpath(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"), file) 43 println("integration test for $jsonfile") 44 df = ArrowJSON.parsefile(jsonfile); 45 io = IOBuffer() 46 Arrow.write(io, df) 47 seekstart(io) 48 tbl = Arrow.Table(io; convert=false); 49 @test isequal(df, tbl) 50end 51 52end # @testset "arrow json integration tests" 53 54@testset "misc" begin 55 56# multiple record batches 57t = Tables.partitioner(((col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],), (col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],))) 58io = IOBuffer() 59Arrow.write(io, t) 60seekstart(io) 61tt = Arrow.Table(io) 62@test length(tt) == 1 63@test isequal(tt.col1, vcat([1,2,3,4,5,6,7,8,9,missing], [1,2,3,4,5,6,7,8,9,missing])) 64@test eltype(tt.col1) === Union{Int64, Missing} 65 66# Arrow.Stream 67seekstart(io) 68str = Arrow.Stream(io) 69state = iterate(str) 70@test state !== nothing 71tt, st = state 72@test length(tt) == 1 73@test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing]) 74 75state = iterate(str, st) 76@test state !== nothing 77tt, st = state 78@test length(tt) == 1 79@test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing]) 80 81@test iterate(str, st) === nothing 82 83# dictionary batch isDelta 84t = ( 85 col1=Int64[1,2,3,4], 86 col2=Union{String, Missing}["hey", "there", "sailor", missing], 87 col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))] 88) 89t2 = ( 90 col1=Int64[1,2,5,6], 91 col2=Union{String, Missing}["hey", "there", "sailor2", missing], 92 col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))] 93) 94tt = Tables.partitioner((t, t2)) 95io = IOBuffer() 96Arrow.write(io, tt; dictencode=true, dictencodenested=true) 97seekstart(io) 98tt = Arrow.Table(io) 99@test tt.col1 == [1,2,3,4,1,2,5,6] 100@test isequal(tt.col2, ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing]) 101@test isequal(tt.col3, vcat(NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))], NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))])) 102 103t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) 104meta = Dict("key1" => "value1", "key2" => "value2") 105Arrow.setmetadata!(t, meta) 106meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2") 107Arrow.setmetadata!(t.col1, meta2) 108io = IOBuffer() 109Arrow.write(io, t) 110seekstart(io) 111tt = Arrow.Table(io) 112@test length(tt) == length(t) 113@test tt.col1 == t.col1 114@test eltype(tt.col1) === Int64 115@test Arrow.getmetadata(tt) == meta 116@test Arrow.getmetadata(tt.col1) == meta2 117 118# custom compressors 119lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8) 120Arrow.CodecLz4.TranscodingStreams.initialize(lz4) 121t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) 122io = IOBuffer() 123Arrow.write(io, t; compress=lz4) 124seekstart(io) 125tt = Arrow.Table(io) 126@test length(tt) == length(t) 127@test all(isequal.(values(t), values(tt))) 128 129zstd = Arrow.CodecZstd.ZstdCompressor(; level=8) 130Arrow.CodecZstd.TranscodingStreams.initialize(zstd) 131t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) 132io = IOBuffer() 133Arrow.write(io, t; compress=zstd) 134seekstart(io) 135tt = Arrow.Table(io) 136@test length(tt) == length(t) 137@test all(isequal.(values(t), values(tt))) 138 139# custom alignment 140t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) 141io = IOBuffer() 142Arrow.write(io, t; alignment=64) 143seekstart(io) 144tt = Arrow.Table(io) 145@test length(tt) == length(t) 146@test all(isequal.(values(t), values(tt))) 147 148# 53 149s = "a" ^ 100 150t = (a=[SubString(s, 1:10), SubString(s, 11:20)],) 151io = IOBuffer() 152Arrow.write(io, t) 153seekstart(io) 154tt = Arrow.Table(io) 155@test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"] 156 157# 49 158@test_throws ArgumentError Arrow.Table("file_that_doesnt_exist") 159 160# 52 161t = (a=Arrow.DictEncode(string.(1:129)),) 162io = IOBuffer() 163Arrow.write(io, t) 164seekstart(io) 165tt = Arrow.Table(io) 166 167# 60: unequal column lengths 168io = IOBuffer() 169@test_throws ArgumentError Arrow.write(io, (a = Int[], b = ["asd"], c=collect(1:100))) 170 171# nullability of custom extension types 172t = (a=['a', missing],) 173io = IOBuffer() 174Arrow.write(io, t) 175seekstart(io) 176tt = Arrow.Table(io) 177@test isequal(tt.a, ['a', missing]) 178 179# automatic custom struct serialization/deserialization 180t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],) 181io = IOBuffer() 182Arrow.write(io, t) 183seekstart(io) 184tt = Arrow.Table(io) 185@test length(tt) == length(t) 186@test all(isequal.(values(t), values(tt))) 187 188# 76 189t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],) 190io = IOBuffer() 191Arrow.write(io, t) 192seekstart(io) 193tt = Arrow.Table(io) 194@test length(tt) == length(t) 195@test all(isequal.(values(t), values(tt))) 196 197# 89 - test deprecation path for old UUID autoconversion 198u = 0x6036fcbd20664bd8a65cdfa25434513f 199@test Arrow.ArrowTypes.arrowconvert(UUID, (value=u,)) === UUID(u) 200 201# 98 202t = (a = [Nanosecond(0), Nanosecond(1)], b = [uuid4(), uuid4()], c = [missing, Nanosecond(1)]) 203io = IOBuffer() 204Arrow.write(io, t) 205seekstart(io) 206tt = Arrow.Table(io) 207@test copy(tt.a) isa Vector{Nanosecond} 208@test copy(tt.b) isa Vector{UUID} 209@test copy(tt.c) isa Vector{Union{Missing,Nanosecond}} 210 211# copy on DictEncoding w/ missing values 212x = PooledArray(["hey", missing]) 213x2 = Arrow.toarrowvector(x) 214@test isequal(copy(x2), x) 215 216end # @testset "misc" 217 218end 219