1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 //
5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 #include "../../precomp.hpp"
9 #include <limits>
10 #include "common.hpp"
11 #include "internal.hpp"
12 #include "../include/op_permute.hpp"
13
14 namespace cv { namespace dnn { namespace vkcom {
15
16 #ifdef HAVE_VULKAN
17
18 struct PermuteParam {
19 int global_size;
20 int num_axes;
21 int nthreads;
22 };
23
needForPermutation(std::vector<int> & order)24 static bool needForPermutation(std::vector<int>& order)
25 {
26 for (int i = 0; i < order.size(); ++i)
27 {
28 if (order[i] != i)
29 return true;
30 }
31 return false;
32 }
33
OpPermute(std::vector<size_t> & order)34 OpPermute::OpPermute(std::vector<size_t>& order)
35 {
36 order_.assign(order.begin(), order.end());
37 dims_ = order.size();
38 need_permute_ = needForPermutation(order_);
39 type_ = "Permute";
40 if (need_permute_)
41 OpBase::initVulkanThing(5);
42 }
43
reshapeOutTensor(std::vector<Tensor * > & ins,std::vector<Tensor> & outs)44 void OpPermute::reshapeOutTensor(std::vector<Tensor *>& ins, std::vector<Tensor>& outs)
45 {
46 assert(!ins.empty());
47 assert(ins.size() == outs.size());
48
49 if (need_permute_)
50 {
51 assert(dims_ == ins[0]->dimNum());
52
53 Shape shape_before = ins[0]->getShape();
54 Shape shape_after;
55 for (size_t i = 0; i < dims_; i++)
56 {
57 shape_after.push_back(shape_before[order_[i]]);
58 }
59
60 for (size_t i = 0; i < ins.size(); i++)
61 {
62 assert(ins[i]->dimNum() == 4);
63 assert(ins[i]->dimSize(2) == shape_before[2] && ins[i]->dimSize(3) == shape_before[3]);
64 assert(ins[i]->count() == shapeCount(shape_after));
65 outs[i].reshape(NULL, shape_after);
66 }
67 }
68 else
69 {
70 for(int i = 0; i < ins.size(); i++)
71 {
72 Shape in_shape = ins[i]->getShape();
73 outs[i].reshape(NULL, in_shape);
74 }
75 }
76 }
77
prepareStrides(const Shape & shape_before,const Shape & shape_after)78 void OpPermute::prepareStrides(const Shape &shape_before, const Shape &shape_after)
79 {
80 assert(shape_before.size() == dims_);
81 assert(shape_after.size() == dims_);
82
83 old_stride_.resize(dims_);
84 new_stride_.resize(dims_);
85
86 old_stride_[dims_ - 1] = 1;
87 new_stride_[dims_ - 1] = 1;
88
89 for(int i = dims_ - 2; i >= 0; i--)
90 {
91 old_stride_[i] = old_stride_[i + 1] * shape_before[i + 1];
92 new_stride_[i] = new_stride_[i + 1] * shape_after[i + 1];
93 }
94
95 Shape shape(1, old_stride_.size());
96 tensor_old_stride_.reshape((const char*)old_stride_.data(), shape, kFormatInt32);
97 tensor_new_stride_.reshape((const char*)new_stride_.data(), shape, kFormatInt32);
98 }
99
forward(std::vector<Tensor> & ins,std::vector<Tensor> & blobs,std::vector<Tensor> & outs)100 bool OpPermute::forward(std::vector<Tensor>& ins,
101 std::vector<Tensor>& blobs,
102 std::vector<Tensor>& outs)
103 {
104 return forward(ins, outs);
105 }
106
forward(std::vector<Tensor> & ins,std::vector<Tensor> & outs)107 bool OpPermute::forward(std::vector<Tensor>& ins, std::vector<Tensor>& outs)
108 {
109 int num_ins = ins.size();
110 in_shape_ = ins[0].getShape();
111 out_shape_ = outs[0].getShape();
112 if (!need_permute_)
113 {
114 for (int i = 0; i < num_ins; i++)
115 {
116 assert(outs[i].count() == ins[i].count());
117 if (outs[i].getBuffer() != ins[i].getBuffer())
118 ins[i].copyTo(outs[i]);
119 }
120 return true;
121 }
122
123 if (pipeline_ == VK_NULL_HANDLE)
124 {
125 createShaderModule(permute_spv, sizeof(permute_spv));
126 createPipeline(sizeof(PermuteParam));
127 }
128
129 prepareStrides(ins[0].getShape(), outs[0].getShape());
130 std::vector<int>shape(1, order_.size());
131 tensor_order_.reshape((const char*)order_.data(), shape, kFormatInt32);
132 bindTensor(device_, tensor_order_, 1, descriptor_set_);
133 bindTensor(device_, tensor_old_stride_, 2, descriptor_set_);
134 bindTensor(device_, tensor_new_stride_, 3, descriptor_set_);
135
136 nthreads_ = ins[0].count();
137 #define LOCAL_SZ_X 256
138 global_size_ = alignSize(nthreads_, LOCAL_SZ_X);
139 computeGroupCount();
140
141 PermuteParam param = {global_size_, dims_, nthreads_};
142 for (int i = 0; i < num_ins; i++)
143 {
144 bindTensor(device_, ins[i], 0, descriptor_set_);
145 bindTensor(device_, outs[i], 4, descriptor_set_);
146 recordCommandBuffer((void *)¶m, sizeof(PermuteParam));
147 runCommandBuffer();
148 }
149
150 return true;
151 }
152
computeGroupCount()153 bool OpPermute::computeGroupCount()
154 {
155 group_x_ = global_size_ / LOCAL_SZ_X;
156 group_y_ = 1;
157 group_z_ = 1;
158 return true;
159 }
160
161 #endif // HAVE_VULKAN
162
163 }}} // namespace cv::dnn::vkcom
164