1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 // A command line executable that generates a bunch of valid IPC files
19 // containing example tensors. Those are used as fuzzing seeds to make
20 // fuzzing more efficient.
21
22 #include <cstdlib>
23 #include <iostream>
24 #include <memory>
25 #include <string>
26 #include <vector>
27
28 #include "arrow/io/file.h"
29 #include "arrow/io/memory.h"
30 #include "arrow/ipc/test_common.h"
31 #include "arrow/ipc/writer.h"
32 #include "arrow/result.h"
33 #include "arrow/tensor.h"
34 #include "arrow/util/io_util.h"
35
36 namespace arrow {
37 namespace ipc {
38
39 using ::arrow::internal::PlatformFilename;
40
PrepareDirectory(const std::string & dir)41 Result<PlatformFilename> PrepareDirectory(const std::string& dir) {
42 ARROW_ASSIGN_OR_RAISE(auto dir_fn, PlatformFilename::FromString(dir));
43 RETURN_NOT_OK(::arrow::internal::CreateDir(dir_fn));
44 return std::move(dir_fn);
45 }
46
MakeSerializedBuffer(std::function<Status (const std::shared_ptr<io::BufferOutputStream> &)> fn)47 Result<std::shared_ptr<Buffer>> MakeSerializedBuffer(
48 std::function<Status(const std::shared_ptr<io::BufferOutputStream>&)> fn) {
49 ARROW_ASSIGN_OR_RAISE(auto sink, io::BufferOutputStream::Create(1024));
50 RETURN_NOT_OK(fn(sink));
51 return sink->Finish();
52 }
53
SerializeTensor(const std::shared_ptr<Tensor> & tensor)54 Result<std::shared_ptr<Buffer>> SerializeTensor(const std::shared_ptr<Tensor>& tensor) {
55 return MakeSerializedBuffer(
56 [&](const std::shared_ptr<io::BufferOutputStream>& sink) -> Status {
57 int32_t metadata_length;
58 int64_t body_length;
59 return ipc::WriteTensor(*tensor, sink.get(), &metadata_length, &body_length);
60 });
61 }
62
Tensors()63 Result<std::vector<std::shared_ptr<Tensor>>> Tensors() {
64 std::vector<std::shared_ptr<Tensor>> tensors;
65 std::shared_ptr<Tensor> tensor;
66 std::vector<int64_t> shape = {5, 3, 7};
67 std::shared_ptr<DataType> types[] = {int8(), int16(), int32(), int64(),
68 uint8(), uint16(), uint32(), uint64()};
69 uint32_t seed = 0;
70 for (auto type : types) {
71 RETURN_NOT_OK(
72 test::MakeRandomTensor(type, shape, /*row_major_p=*/true, &tensor, seed++));
73 tensors.push_back(tensor);
74 RETURN_NOT_OK(
75 test::MakeRandomTensor(type, shape, /*row_major_p=*/false, &tensor, seed++));
76 tensors.push_back(tensor);
77 }
78 return tensors;
79 }
80
GenerateTensors(const PlatformFilename & dir_fn)81 Status GenerateTensors(const PlatformFilename& dir_fn) {
82 int sample_num = 1;
83 auto sample_name = [&]() -> std::string {
84 return "tensor-" + std::to_string(sample_num++);
85 };
86
87 ARROW_ASSIGN_OR_RAISE(auto tensors, Tensors());
88
89 for (const auto& tensor : tensors) {
90 ARROW_ASSIGN_OR_RAISE(auto buf, SerializeTensor(tensor));
91 ARROW_ASSIGN_OR_RAISE(auto sample_fn, dir_fn.Join(sample_name()));
92 std::cerr << sample_fn.ToString() << std::endl;
93 ARROW_ASSIGN_OR_RAISE(auto file, io::FileOutputStream::Open(sample_fn.ToString()));
94 RETURN_NOT_OK(file->Write(buf));
95 RETURN_NOT_OK(file->Close());
96 }
97 return Status::OK();
98 }
99
DoMain(const std::string & out_dir)100 Status DoMain(const std::string& out_dir) {
101 ARROW_ASSIGN_OR_RAISE(auto dir_fn, PrepareDirectory(out_dir));
102 return GenerateTensors(dir_fn);
103 }
104
Usage()105 ARROW_NORETURN void Usage() {
106 std::cerr << "Usage: arrow-ipc-generate-tensor-fuzz-corpus "
107 << "-stream <output directory>" << std::endl;
108 std::exit(2);
109 }
110
Main(int argc,char ** argv)111 int Main(int argc, char** argv) {
112 if (argc != 3) {
113 Usage();
114 }
115
116 auto opt = std::string(argv[1]);
117 if (opt != "-stream") {
118 Usage();
119 }
120
121 auto out_dir = std::string(argv[2]);
122
123 Status st = DoMain(out_dir);
124 if (!st.ok()) {
125 std::cerr << st.ToString() << std::endl;
126 return 1;
127 }
128 return 0;
129 }
130
131 } // namespace ipc
132 } // namespace arrow
133
main(int argc,char ** argv)134 int main(int argc, char** argv) { return arrow::ipc::Main(argc, argv); }
135