1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18class TestTable < Test::Unit::TestCase 19 include Helper::Buildable 20 include Helper::Omittable 21 22 sub_test_case(".new") do 23 def setup 24 @fields = [ 25 Arrow::Field.new("visible", Arrow::BooleanDataType.new), 26 Arrow::Field.new("valid", Arrow::BooleanDataType.new), 27 ] 28 @schema = Arrow::Schema.new(@fields) 29 end 30 31 def dump_table(table) 32 table.n_columns.times.collect do |i| 33 field = table.schema.get_field(i) 34 chunked_array = table.get_column_data(i) 35 values = [] 36 chunked_array.chunks.each do |chunk| 37 chunk.length.times do |j| 38 values << chunk.get_value(j) 39 end 40 end 41 [ 42 field.name, 43 values, 44 ] 45 end 46 end 47 48 def test_arrays 49 require_gi_bindings(3, 3, 1) 50 arrays = [ 51 build_boolean_array([true]), 52 build_boolean_array([false]), 53 ] 54 table = Arrow::Table.new(@schema, arrays) 55 assert_equal([ 56 ["visible", [true]], 57 ["valid", [false]], 58 ], 59 dump_table(table)) 60 end 61 62 def test_chunked_arrays 63 require_gi_bindings(3, 3, 1) 64 arrays = [ 65 Arrow::ChunkedArray.new([build_boolean_array([true]), 66 build_boolean_array([false])]), 67 Arrow::ChunkedArray.new([build_boolean_array([false]), 68 build_boolean_array([true])]), 69 ] 70 table = Arrow::Table.new(@schema, arrays) 71 assert_equal([ 72 ["visible", [true, false]], 73 ["valid", [false, true]], 74 ], 75 dump_table(table)) 76 end 77 78 def test_record_batches 79 require_gi_bindings(3, 3, 1) 80 record_batches = [ 81 build_record_batch({ 82 "visible" => build_boolean_array([true]), 83 "valid" => build_boolean_array([false]) 84 }), 85 build_record_batch({ 86 "visible" => build_boolean_array([false]), 87 "valid" => build_boolean_array([true]) 88 }), 89 ] 90 table = Arrow::Table.new(@schema, record_batches) 91 92 assert_equal([ 93 ["visible", [true, false]], 94 ["valid", [false, true]], 95 ], 96 dump_table(table)) 97 end 98 end 99 100 sub_test_case("instance methods") do 101 def setup 102 @fields = [ 103 Arrow::Field.new("visible", Arrow::BooleanDataType.new), 104 Arrow::Field.new("valid", Arrow::BooleanDataType.new), 105 ] 106 @schema = Arrow::Schema.new(@fields) 107 @columns = [ 108 build_boolean_array([true]), 109 build_boolean_array([false]), 110 ] 111 @table = Arrow::Table.new(@schema, @columns) 112 end 113 114 def test_equal 115 other_table = Arrow::Table.new(@schema, @columns) 116 assert_equal(@table, other_table) 117 end 118 119 def test_equal_metadata 120 other_table = Arrow::Table.new(@schema, @columns) 121 assert do 122 @table.equal_metadata(other_table, true) 123 end 124 end 125 126 def test_schema 127 assert_equal(["visible", "valid"], 128 @table.schema.fields.collect(&:name)) 129 end 130 131 def test_column_data 132 assert_equal([ 133 Arrow::ChunkedArray.new([build_boolean_array([true])]), 134 Arrow::ChunkedArray.new([build_boolean_array([false])]), 135 ], 136 [ 137 @table.get_column_data(0), 138 @table.get_column_data(-1), 139 ]) 140 end 141 142 def test_n_columns 143 assert_equal(2, @table.n_columns) 144 end 145 146 def test_n_rows 147 assert_equal(1, @table.n_rows) 148 end 149 150 def test_add_column 151 field = Arrow::Field.new("added", Arrow::BooleanDataType.new) 152 chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])]) 153 new_table = @table.add_column(1, field, chunked_array) 154 assert_equal(["visible", "added", "valid"], 155 new_table.schema.fields.collect(&:name)) 156 end 157 158 def test_remove_column 159 new_table = @table.remove_column(0) 160 assert_equal(["valid"], 161 new_table.schema.fields.collect(&:name)) 162 end 163 164 def test_replace_column 165 field = Arrow::Field.new("added", Arrow::BooleanDataType.new) 166 chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])]) 167 new_table = @table.replace_column(0, field, chunked_array) 168 assert_equal(["added", "valid"], 169 new_table.schema.fields.collect(&:name)) 170 end 171 172 def test_to_s 173 table = build_table("valid" => build_boolean_array([true, false, true])) 174 assert_equal(<<-TABLE, table.to_s) 175valid: bool 176---- 177valid: 178 [ 179 [ 180 true, 181 false, 182 true 183 ] 184 ] 185 TABLE 186 end 187 188 def test_concatenate 189 table = build_table("visible" => build_boolean_array([true, false, true, false])) 190 table1 = build_table("visible" => build_boolean_array([true])) 191 table2 = build_table("visible" => build_boolean_array([false, true])) 192 table3 = build_table("visible" => build_boolean_array([false])) 193 assert_equal(table, table1.concatenate([table2, table3])) 194 end 195 196 sub_test_case("#slice") do 197 test("offset: positive") do 198 visibles = [true, false, true] 199 table = build_table("visible" => build_boolean_array(visibles)) 200 assert_equal(build_table("visible" => build_boolean_array([false, true])), 201 table.slice(1, 2)) 202 end 203 204 test("offset: negative") do 205 visibles = [true, false, true] 206 table = build_table("visible" => build_boolean_array(visibles)) 207 assert_equal(build_table("visible" => build_boolean_array([false, true])), 208 table.slice(-2, 2)) 209 end 210 end 211 212 def test_combine_chunks 213 table = build_table( 214 "visible" => Arrow::ChunkedArray::new([build_boolean_array([true, false, true]), 215 build_boolean_array([false, true]), 216 build_boolean_array([false])]) 217 ) 218 combined_table = table.combine_chunks 219 all_values = combined_table.n_columns.times.collect do |i| 220 column = combined_table.get_column_data(i) 221 column.n_chunks.times.collect do |j| 222 column.get_chunk(j).values 223 end 224 end 225 assert_equal([[[true, false, true, false, true, false]]], 226 all_values) 227 end 228 229 sub_test_case("#write_as_feather") do 230 def setup 231 super 232 @tempfile = Tempfile.open("arrow-table-write-as-feather") 233 begin 234 yield 235 ensure 236 @tempfile.close! 237 end 238 end 239 240 def read_feather 241 input = Arrow::MemoryMappedInputStream.new(@tempfile.path) 242 reader = Arrow::FeatherFileReader.new(input) 243 begin 244 yield(reader.read) 245 ensure 246 input.close 247 end 248 end 249 250 test("default") do 251 output = Arrow::FileOutputStream.new(@tempfile.path, false) 252 @table.write_as_feather(output) 253 output.close 254 255 read_feather do |read_table| 256 assert_equal(@table, read_table) 257 end 258 end 259 260 test("compression") do 261 output = Arrow::FileOutputStream.new(@tempfile.path, false) 262 properties = Arrow::FeatherWriteProperties.new 263 properties.compression = :zstd 264 @table.write_as_feather(output, properties) 265 output.close 266 267 read_feather do |read_table| 268 assert_equal(@table, read_table) 269 end 270 end 271 end 272 end 273end 274