1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18class TestTable < Test::Unit::TestCase
19  include Helper::Buildable
20  include Helper::Omittable
21
22  sub_test_case(".new") do
23    def setup
24      @fields = [
25        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
26        Arrow::Field.new("valid", Arrow::BooleanDataType.new),
27      ]
28      @schema = Arrow::Schema.new(@fields)
29    end
30
31    def dump_table(table)
32      table.n_columns.times.collect do |i|
33        field = table.schema.get_field(i)
34        chunked_array = table.get_column_data(i)
35        values = []
36        chunked_array.chunks.each do |chunk|
37          chunk.length.times do |j|
38            values << chunk.get_value(j)
39          end
40        end
41        [
42          field.name,
43          values,
44        ]
45      end
46    end
47
48    def test_arrays
49      require_gi_bindings(3, 3, 1)
50      arrays = [
51        build_boolean_array([true]),
52        build_boolean_array([false]),
53      ]
54      table = Arrow::Table.new(@schema, arrays)
55      assert_equal([
56                     ["visible", [true]],
57                     ["valid", [false]],
58                   ],
59                   dump_table(table))
60    end
61
62    def test_chunked_arrays
63      require_gi_bindings(3, 3, 1)
64      arrays = [
65        Arrow::ChunkedArray.new([build_boolean_array([true]),
66                                 build_boolean_array([false])]),
67        Arrow::ChunkedArray.new([build_boolean_array([false]),
68                                 build_boolean_array([true])]),
69      ]
70      table = Arrow::Table.new(@schema, arrays)
71      assert_equal([
72                     ["visible", [true, false]],
73                     ["valid", [false, true]],
74                   ],
75                   dump_table(table))
76    end
77
78    def test_record_batches
79      require_gi_bindings(3, 3, 1)
80      record_batches = [
81        build_record_batch({
82                             "visible" => build_boolean_array([true]),
83                             "valid" => build_boolean_array([false])
84                           }),
85        build_record_batch({
86                             "visible" => build_boolean_array([false]),
87                             "valid" => build_boolean_array([true])
88                           }),
89      ]
90      table = Arrow::Table.new(@schema, record_batches)
91
92      assert_equal([
93                     ["visible", [true, false]],
94                     ["valid", [false, true]],
95                   ],
96                   dump_table(table))
97    end
98  end
99
100  sub_test_case("instance methods") do
101    def setup
102      @fields = [
103        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
104        Arrow::Field.new("valid", Arrow::BooleanDataType.new),
105      ]
106      @schema = Arrow::Schema.new(@fields)
107      @columns = [
108        build_boolean_array([true]),
109        build_boolean_array([false]),
110      ]
111      @table = Arrow::Table.new(@schema, @columns)
112    end
113
114    def test_equal
115      other_table = Arrow::Table.new(@schema, @columns)
116      assert_equal(@table, other_table)
117    end
118
119    def test_equal_metadata
120      other_table = Arrow::Table.new(@schema, @columns)
121      assert do
122        @table.equal_metadata(other_table, true)
123      end
124    end
125
126    def test_schema
127      assert_equal(["visible", "valid"],
128                   @table.schema.fields.collect(&:name))
129    end
130
131    def test_column_data
132      assert_equal([
133                     Arrow::ChunkedArray.new([build_boolean_array([true])]),
134                     Arrow::ChunkedArray.new([build_boolean_array([false])]),
135                   ],
136                   [
137                     @table.get_column_data(0),
138                     @table.get_column_data(-1),
139                   ])
140    end
141
142    def test_n_columns
143      assert_equal(2, @table.n_columns)
144    end
145
146    def test_n_rows
147      assert_equal(1, @table.n_rows)
148    end
149
150    def test_add_column
151      field = Arrow::Field.new("added", Arrow::BooleanDataType.new)
152      chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])])
153      new_table = @table.add_column(1, field, chunked_array)
154      assert_equal(["visible", "added", "valid"],
155                   new_table.schema.fields.collect(&:name))
156    end
157
158    def test_remove_column
159      new_table = @table.remove_column(0)
160      assert_equal(["valid"],
161                   new_table.schema.fields.collect(&:name))
162    end
163
164    def test_replace_column
165      field = Arrow::Field.new("added", Arrow::BooleanDataType.new)
166      chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])])
167      new_table = @table.replace_column(0, field, chunked_array)
168      assert_equal(["added", "valid"],
169                   new_table.schema.fields.collect(&:name))
170    end
171
172    def test_to_s
173      table = build_table("valid" => build_boolean_array([true, false, true]))
174      assert_equal(<<-TABLE, table.to_s)
175valid: bool
176----
177valid:
178  [
179    [
180      true,
181      false,
182      true
183    ]
184  ]
185      TABLE
186    end
187
188    def test_concatenate
189      table = build_table("visible" => build_boolean_array([true, false, true, false]))
190      table1 = build_table("visible" => build_boolean_array([true]))
191      table2 = build_table("visible" => build_boolean_array([false, true]))
192      table3 = build_table("visible" => build_boolean_array([false]))
193      assert_equal(table, table1.concatenate([table2, table3]))
194    end
195
196    sub_test_case("#slice") do
197      test("offset: positive") do
198        visibles = [true, false, true]
199        table = build_table("visible" => build_boolean_array(visibles))
200        assert_equal(build_table("visible" => build_boolean_array([false, true])),
201                     table.slice(1, 2))
202      end
203
204      test("offset: negative") do
205        visibles = [true, false, true]
206        table = build_table("visible" => build_boolean_array(visibles))
207        assert_equal(build_table("visible" => build_boolean_array([false, true])),
208                     table.slice(-2, 2))
209      end
210    end
211
212    def test_combine_chunks
213      table = build_table(
214        "visible" => Arrow::ChunkedArray::new([build_boolean_array([true, false, true]),
215                                               build_boolean_array([false, true]),
216                                               build_boolean_array([false])])
217      )
218      combined_table = table.combine_chunks
219      all_values = combined_table.n_columns.times.collect do |i|
220        column = combined_table.get_column_data(i)
221        column.n_chunks.times.collect do |j|
222          column.get_chunk(j).values
223        end
224      end
225      assert_equal([[[true, false, true, false, true, false]]],
226                   all_values)
227    end
228
229    sub_test_case("#write_as_feather") do
230      def setup
231        super
232        @tempfile = Tempfile.open("arrow-table-write-as-feather")
233        begin
234          yield
235        ensure
236          @tempfile.close!
237        end
238      end
239
240      def read_feather
241        input = Arrow::MemoryMappedInputStream.new(@tempfile.path)
242        reader = Arrow::FeatherFileReader.new(input)
243        begin
244          yield(reader.read)
245        ensure
246          input.close
247        end
248      end
249
250      test("default") do
251        output = Arrow::FileOutputStream.new(@tempfile.path, false)
252        @table.write_as_feather(output)
253        output.close
254
255        read_feather do |read_table|
256          assert_equal(@table, read_table)
257        end
258      end
259
260      test("compression") do
261        output = Arrow::FileOutputStream.new(@tempfile.path, false)
262        properties = Arrow::FeatherWriteProperties.new
263        properties.compression = :zstd
264        @table.write_as_feather(output, properties)
265        output.close
266
267        read_feather do |read_table|
268          assert_equal(@table, read_table)
269        end
270      end
271    end
272  end
273end
274