1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17using Test, Arrow, Tables, Dates, PooledArrays, TimeZones, UUIDs
18
19include(joinpath(dirname(pathof(Arrow)), "../test/testtables.jl"))
20include(joinpath(dirname(pathof(Arrow)), "../test/integrationtest.jl"))
21include(joinpath(dirname(pathof(Arrow)), "../test/dates.jl"))
22
23struct CustomStruct
24    x::Int
25    y::Float64
26    z::String
27end
28
29@testset "Arrow" begin
30
31@testset "table roundtrips" begin
32
33for case in testtables
34    testtable(case...)
35end
36
37end # @testset "table roundtrips"
38
39@testset "arrow json integration tests" begin
40
41for file in readdir(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"))
42    jsonfile = joinpath(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"), file)
43    println("integration test for $jsonfile")
44    df = ArrowJSON.parsefile(jsonfile);
45    io = IOBuffer()
46    Arrow.write(io, df)
47    seekstart(io)
48    tbl = Arrow.Table(io; convert=false);
49    @test isequal(df, tbl)
50end
51
52end # @testset "arrow json integration tests"
53
54@testset "misc" begin
55
56# multiple record batches
57t = Tables.partitioner(((col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],), (col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],)))
58io = IOBuffer()
59Arrow.write(io, t)
60seekstart(io)
61tt = Arrow.Table(io)
62@test length(tt) == 1
63@test isequal(tt.col1, vcat([1,2,3,4,5,6,7,8,9,missing], [1,2,3,4,5,6,7,8,9,missing]))
64@test eltype(tt.col1) === Union{Int64, Missing}
65
66# Arrow.Stream
67seekstart(io)
68str = Arrow.Stream(io)
69state = iterate(str)
70@test state !== nothing
71tt, st = state
72@test length(tt) == 1
73@test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing])
74
75state = iterate(str, st)
76@test state !== nothing
77tt, st = state
78@test length(tt) == 1
79@test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing])
80
81@test iterate(str, st) === nothing
82
83# dictionary batch isDelta
84t = (
85    col1=Int64[1,2,3,4],
86    col2=Union{String, Missing}["hey", "there", "sailor", missing],
87    col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))]
88)
89t2 = (
90    col1=Int64[1,2,5,6],
91    col2=Union{String, Missing}["hey", "there", "sailor2", missing],
92    col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]
93)
94tt = Tables.partitioner((t, t2))
95io = IOBuffer()
96Arrow.write(io, tt; dictencode=true, dictencodenested=true)
97seekstart(io)
98tt = Arrow.Table(io)
99@test tt.col1 == [1,2,3,4,1,2,5,6]
100@test isequal(tt.col2, ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing])
101@test isequal(tt.col3, vcat(NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))], NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]))
102
103t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
104meta = Dict("key1" => "value1", "key2" => "value2")
105Arrow.setmetadata!(t, meta)
106meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2")
107Arrow.setmetadata!(t.col1, meta2)
108io = IOBuffer()
109Arrow.write(io, t)
110seekstart(io)
111tt = Arrow.Table(io)
112@test length(tt) == length(t)
113@test tt.col1 == t.col1
114@test eltype(tt.col1) === Int64
115@test Arrow.getmetadata(tt) == meta
116@test Arrow.getmetadata(tt.col1) == meta2
117
118# custom compressors
119lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8)
120Arrow.CodecLz4.TranscodingStreams.initialize(lz4)
121t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
122io = IOBuffer()
123Arrow.write(io, t; compress=lz4)
124seekstart(io)
125tt = Arrow.Table(io)
126@test length(tt) == length(t)
127@test all(isequal.(values(t), values(tt)))
128
129zstd = Arrow.CodecZstd.ZstdCompressor(; level=8)
130Arrow.CodecZstd.TranscodingStreams.initialize(zstd)
131t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
132io = IOBuffer()
133Arrow.write(io, t; compress=zstd)
134seekstart(io)
135tt = Arrow.Table(io)
136@test length(tt) == length(t)
137@test all(isequal.(values(t), values(tt)))
138
139# custom alignment
140t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
141io = IOBuffer()
142Arrow.write(io, t; alignment=64)
143seekstart(io)
144tt = Arrow.Table(io)
145@test length(tt) == length(t)
146@test all(isequal.(values(t), values(tt)))
147
148# 53
149s = "a" ^ 100
150t = (a=[SubString(s, 1:10), SubString(s, 11:20)],)
151io = IOBuffer()
152Arrow.write(io, t)
153seekstart(io)
154tt = Arrow.Table(io)
155@test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"]
156
157# 49
158@test_throws ArgumentError Arrow.Table("file_that_doesnt_exist")
159
160# 52
161t = (a=Arrow.DictEncode(string.(1:129)),)
162io = IOBuffer()
163Arrow.write(io, t)
164seekstart(io)
165tt = Arrow.Table(io)
166
167# 60: unequal column lengths
168io = IOBuffer()
169@test_throws ArgumentError Arrow.write(io, (a = Int[], b = ["asd"], c=collect(1:100)))
170
171# nullability of custom extension types
172t = (a=['a', missing],)
173io = IOBuffer()
174Arrow.write(io, t)
175seekstart(io)
176tt = Arrow.Table(io)
177@test isequal(tt.a, ['a', missing])
178
179# automatic custom struct serialization/deserialization
180t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],)
181io = IOBuffer()
182Arrow.write(io, t)
183seekstart(io)
184tt = Arrow.Table(io)
185@test length(tt) == length(t)
186@test all(isequal.(values(t), values(tt)))
187
188# 76
189t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],)
190io = IOBuffer()
191Arrow.write(io, t)
192seekstart(io)
193tt = Arrow.Table(io)
194@test length(tt) == length(t)
195@test all(isequal.(values(t), values(tt)))
196
197# 89 - test deprecation path for old UUID autoconversion
198u = 0x6036fcbd20664bd8a65cdfa25434513f
199@test Arrow.ArrowTypes.arrowconvert(UUID, (value=u,)) === UUID(u)
200
201# 98
202t = (a = [Nanosecond(0), Nanosecond(1)], b = [uuid4(), uuid4()], c = [missing, Nanosecond(1)])
203io = IOBuffer()
204Arrow.write(io, t)
205seekstart(io)
206tt = Arrow.Table(io)
207@test copy(tt.a) isa Vector{Nanosecond}
208@test copy(tt.b) isa Vector{UUID}
209@test copy(tt.c) isa Vector{Union{Missing,Nanosecond}}
210
211# copy on DictEncoding w/ missing values
212x = PooledArray(["hey", missing])
213x2 = Arrow.toarrowvector(x)
214@test isequal(copy(x2), x)
215
216end # @testset "misc"
217
218end
219