1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // A command line executable that generates a bunch of valid IPC files
19 // containing example tensors.  Those are used as fuzzing seeds to make
20 // fuzzing more efficient.
21 
22 #include <cstdlib>
23 #include <iostream>
24 #include <memory>
25 #include <string>
26 #include <vector>
27 
28 #include "arrow/io/file.h"
29 #include "arrow/io/memory.h"
30 #include "arrow/ipc/test_common.h"
31 #include "arrow/ipc/writer.h"
32 #include "arrow/result.h"
33 #include "arrow/tensor.h"
34 #include "arrow/util/io_util.h"
35 
36 namespace arrow {
37 namespace ipc {
38 
39 using ::arrow::internal::PlatformFilename;
40 
PrepareDirectory(const std::string & dir)41 Result<PlatformFilename> PrepareDirectory(const std::string& dir) {
42   ARROW_ASSIGN_OR_RAISE(auto dir_fn, PlatformFilename::FromString(dir));
43   RETURN_NOT_OK(::arrow::internal::CreateDir(dir_fn));
44   return std::move(dir_fn);
45 }
46 
MakeSerializedBuffer(std::function<Status (const std::shared_ptr<io::BufferOutputStream> &)> fn)47 Result<std::shared_ptr<Buffer>> MakeSerializedBuffer(
48     std::function<Status(const std::shared_ptr<io::BufferOutputStream>&)> fn) {
49   ARROW_ASSIGN_OR_RAISE(auto sink, io::BufferOutputStream::Create(1024));
50   RETURN_NOT_OK(fn(sink));
51   return sink->Finish();
52 }
53 
SerializeTensor(const std::shared_ptr<Tensor> & tensor)54 Result<std::shared_ptr<Buffer>> SerializeTensor(const std::shared_ptr<Tensor>& tensor) {
55   return MakeSerializedBuffer(
56       [&](const std::shared_ptr<io::BufferOutputStream>& sink) -> Status {
57         int32_t metadata_length;
58         int64_t body_length;
59         return ipc::WriteTensor(*tensor, sink.get(), &metadata_length, &body_length);
60       });
61 }
62 
Tensors()63 Result<std::vector<std::shared_ptr<Tensor>>> Tensors() {
64   std::vector<std::shared_ptr<Tensor>> tensors;
65   std::shared_ptr<Tensor> tensor;
66   std::vector<int64_t> shape = {5, 3, 7};
67   std::shared_ptr<DataType> types[] = {int8(),  int16(),  int32(),  int64(),
68                                        uint8(), uint16(), uint32(), uint64()};
69   uint32_t seed = 0;
70   for (auto type : types) {
71     RETURN_NOT_OK(
72         test::MakeRandomTensor(type, shape, /*row_major_p=*/true, &tensor, seed++));
73     tensors.push_back(tensor);
74     RETURN_NOT_OK(
75         test::MakeRandomTensor(type, shape, /*row_major_p=*/false, &tensor, seed++));
76     tensors.push_back(tensor);
77   }
78   return tensors;
79 }
80 
GenerateTensors(const PlatformFilename & dir_fn)81 Status GenerateTensors(const PlatformFilename& dir_fn) {
82   int sample_num = 1;
83   auto sample_name = [&]() -> std::string {
84     return "tensor-" + std::to_string(sample_num++);
85   };
86 
87   ARROW_ASSIGN_OR_RAISE(auto tensors, Tensors());
88 
89   for (const auto& tensor : tensors) {
90     ARROW_ASSIGN_OR_RAISE(auto buf, SerializeTensor(tensor));
91     ARROW_ASSIGN_OR_RAISE(auto sample_fn, dir_fn.Join(sample_name()));
92     std::cerr << sample_fn.ToString() << std::endl;
93     ARROW_ASSIGN_OR_RAISE(auto file, io::FileOutputStream::Open(sample_fn.ToString()));
94     RETURN_NOT_OK(file->Write(buf));
95     RETURN_NOT_OK(file->Close());
96   }
97   return Status::OK();
98 }
99 
DoMain(const std::string & out_dir)100 Status DoMain(const std::string& out_dir) {
101   ARROW_ASSIGN_OR_RAISE(auto dir_fn, PrepareDirectory(out_dir));
102   return GenerateTensors(dir_fn);
103 }
104 
Usage()105 ARROW_NORETURN void Usage() {
106   std::cerr << "Usage: arrow-ipc-generate-tensor-fuzz-corpus "
107             << "-stream <output directory>" << std::endl;
108   std::exit(2);
109 }
110 
Main(int argc,char ** argv)111 int Main(int argc, char** argv) {
112   if (argc != 3) {
113     Usage();
114   }
115 
116   auto opt = std::string(argv[1]);
117   if (opt != "-stream") {
118     Usage();
119   }
120 
121   auto out_dir = std::string(argv[2]);
122 
123   Status st = DoMain(out_dir);
124   if (!st.ok()) {
125     std::cerr << st.ToString() << std::endl;
126     return 1;
127   }
128   return 0;
129 }
130 
131 }  // namespace ipc
132 }  // namespace arrow
133 
main(int argc,char ** argv)134 int main(int argc, char** argv) { return arrow::ipc::Main(argc, argv); }
135