1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17 
18 
19 //! \addtogroup glue_conv
20 //! @{
21 
22 
23 
24 // TODO: this implementation of conv() is rudimentary; replace with faster version
25 template<typename eT>
26 inline
27 void
apply(Mat<eT> & out,const Mat<eT> & A,const Mat<eT> & B,const bool A_is_col)28 glue_conv::apply(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B, const bool A_is_col)
29   {
30   arma_extra_debug_sigprint();
31 
32   const Mat<eT>& h = (A.n_elem <= B.n_elem) ? A : B;
33   const Mat<eT>& x = (A.n_elem <= B.n_elem) ? B : A;
34 
35   const uword   h_n_elem    = h.n_elem;
36   const uword   h_n_elem_m1 = h_n_elem - 1;
37   const uword   x_n_elem    = x.n_elem;
38   const uword out_n_elem    = ((h_n_elem + x_n_elem) > 0) ? (h_n_elem + x_n_elem - 1) : uword(0);
39 
40   if( (h_n_elem == 0) || (x_n_elem == 0) )  { out.zeros(); return; }
41 
42 
43   Col<eT> hh(h_n_elem, arma_nozeros_indicator());  // flipped version of h
44 
45   const eT*   h_mem =  h.memptr();
46         eT*  hh_mem = hh.memptr();
47 
48   for(uword i=0; i < h_n_elem; ++i)
49     {
50     hh_mem[h_n_elem_m1-i] = h_mem[i];
51     }
52 
53 
54   Col<eT> xx( (x_n_elem + 2*h_n_elem_m1), arma_zeros_indicator() );  // zero padded version of x
55 
56   const eT*  x_mem =  x.memptr();
57         eT* xx_mem = xx.memptr();
58 
59   arrayops::copy( &(xx_mem[h_n_elem_m1]), x_mem, x_n_elem );
60 
61 
62   (A_is_col) ? out.set_size(out_n_elem, 1) : out.set_size(1, out_n_elem);
63 
64   eT* out_mem = out.memptr();
65 
66   for(uword i=0; i < out_n_elem; ++i)
67     {
68     // out_mem[i] = dot( hh, xx.subvec(i, (i + h_n_elem_m1)) );
69 
70     out_mem[i] = op_dot::direct_dot( h_n_elem, hh_mem, &(xx_mem[i]) );
71     }
72   }
73 
74 
75 
76 // // alternative implementation of 1d convolution
77 // template<typename eT>
78 // inline
79 // void
80 // glue_conv::apply(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B, const bool A_is_col)
81 //   {
82 //   arma_extra_debug_sigprint();
83 //
84 //   const Mat<eT>& h = (A.n_elem <= B.n_elem) ? A : B;
85 //   const Mat<eT>& x = (A.n_elem <= B.n_elem) ? B : A;
86 //
87 //   const uword   h_n_elem    = h.n_elem;
88 //   const uword   h_n_elem_m1 = h_n_elem - 1;
89 //   const uword   x_n_elem    = x.n_elem;
90 //   const uword out_n_elem    = ((h_n_elem + x_n_elem) > 0) ? (h_n_elem + x_n_elem - 1) : uword(0);
91 //
92 //   if( (h_n_elem == 0) || (x_n_elem == 0) )  { out.zeros(); return; }
93 //
94 //
95 //   Col<eT> hh(h_n_elem, arma_nozeros_indicator());  // flipped version of h
96 //
97 //   const eT*   h_mem =  h.memptr();
98 //         eT*  hh_mem = hh.memptr();
99 //
100 //   for(uword i=0; i < h_n_elem; ++i)
101 //     {
102 //     hh_mem[h_n_elem_m1-i] = h_mem[i];
103 //     }
104 //
105 //   // construct HH matrix, with the column containing shifted versions of hh;
106 //   // upper limit for number of zeros is about 50%; may not be optimal
107 //   const uword N_copies = (std::min)(uword(10), h_n_elem);
108 //
109 //   const uword HH_n_rows = h_n_elem + (N_copies-1);
110 //
111 //   Mat<eT> HH(HH_n_rows, N_copies, arma_zeros_indicator());
112 //
113 //   for(uword i=0; i<N_copies; ++i)
114 //     {
115 //     arrayops::copy(HH.colptr(i) + i, hh.memptr(), h_n_elem);
116 //     }
117 //
118 //
119 //
120 //   Col<eT> xx( (x_n_elem + 2*h_n_elem_m1), arma_zeros_indicator() );  // zero padded version of x
121 //
122 //   const eT*  x_mem =  x.memptr();
123 //         eT* xx_mem = xx.memptr();
124 //
125 //   arrayops::copy( &(xx_mem[h_n_elem_m1]), x_mem, x_n_elem );
126 //
127 //
128 //   (A_is_col) ? out.set_size(out_n_elem, 1) : out.set_size(1, out_n_elem);
129 //
130 //   eT* out_mem = out.memptr();
131 //
132 //   uword last_i      = 0;
133 //   bool  last_i_done = false;
134 //
135 //   for(uword i=0; i < xx.n_elem; i += N_copies)
136 //     {
137 //     if( ((i + HH_n_rows) <= xx.n_elem) && ((i + N_copies) <= out_n_elem) )
138 //       {
139 //       const Row<eT> xx_sub(xx_mem + i, HH_n_rows, false, true);
140 //
141 //       Row<eT> out_sub(out_mem + i, N_copies, false, true);
142 //
143 //       out_sub = xx_sub * HH;
144 //
145 //       last_i_done = true;
146 //       }
147 //     else
148 //       {
149 //       last_i = i;
150 //       last_i_done = false;
151 //       break;
152 //       }
153 //     }
154 //
155 //   if(last_i_done == false)
156 //     {
157 //     for(uword i=last_i; i < out_n_elem; ++i)
158 //       {
159 //       // out_mem[i] = dot( hh, xx.subvec(i, (i + h_n_elem_m1)) );
160 //
161 //       out_mem[i] = op_dot::direct_dot( h_n_elem, hh_mem, &(xx_mem[i]) );
162 //       }
163 //     }
164 //   }
165 
166 
167 
168 template<typename T1, typename T2>
169 inline
170 void
apply(Mat<typename T1::elem_type> & out,const Glue<T1,T2,glue_conv> & expr)171 glue_conv::apply(Mat<typename T1::elem_type>& out, const Glue<T1,T2,glue_conv>& expr)
172   {
173   arma_extra_debug_sigprint();
174 
175   typedef typename T1::elem_type eT;
176 
177   const quasi_unwrap<T1> UA(expr.A);
178   const quasi_unwrap<T2> UB(expr.B);
179 
180   const Mat<eT>& A = UA.M;
181   const Mat<eT>& B = UB.M;
182 
183   arma_debug_check
184     (
185     ( ((A.is_vec() == false) && (A.is_empty() == false)) || ((B.is_vec() == false) && (B.is_empty() == false)) ),
186     "conv(): given object must be a vector"
187     );
188 
189   const bool A_is_col = ((T1::is_col) || (A.n_cols == 1));
190 
191   const uword mode = expr.aux_uword;
192 
193   if(mode == 0)  // full convolution
194     {
195     glue_conv::apply(out, A, B, A_is_col);
196     }
197   else
198   if(mode == 1)  // same size as A
199     {
200     Mat<eT> tmp;
201 
202     glue_conv::apply(tmp, A, B, A_is_col);
203 
204     if( (tmp.is_empty() == false) && (A.is_empty() == false) && (B.is_empty() == false) )
205       {
206       const uword start = uword( std::floor( double(B.n_elem) / double(2) ) );
207 
208       out = (A_is_col) ? tmp(start, 0, arma::size(A)) : tmp(0, start, arma::size(A));
209       }
210     else
211       {
212       out.zeros( arma::size(A) );
213       }
214     }
215   }
216 
217 
218 
219 ///
220 
221 
222 
223 // TODO: this implementation of conv2() is rudimentary; replace with faster version
224 template<typename eT>
225 inline
226 void
apply(Mat<eT> & out,const Mat<eT> & A,const Mat<eT> & B)227 glue_conv2::apply(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
228   {
229   arma_extra_debug_sigprint();
230 
231   const Mat<eT>& G = (A.n_elem <= B.n_elem) ? A : B;   // unflipped filter coefficients
232   const Mat<eT>& W = (A.n_elem <= B.n_elem) ? B : A;   // original 2D image
233 
234   const uword out_n_rows = ((W.n_rows + G.n_rows) > 0) ? (W.n_rows + G.n_rows - 1) : uword(0);
235   const uword out_n_cols = ((W.n_cols + G.n_cols) > 0) ? (W.n_cols + G.n_cols - 1) : uword(0);
236 
237   if(G.is_empty() || W.is_empty())  { out.zeros(); return; }
238 
239 
240   Mat<eT> H(G.n_rows, G.n_cols, arma_nozeros_indicator());  // flipped filter coefficients
241 
242   const uword H_n_rows = H.n_rows;
243   const uword H_n_cols = H.n_cols;
244 
245   const uword H_n_rows_m1 = H_n_rows - 1;
246   const uword H_n_cols_m1 = H_n_cols - 1;
247 
248   for(uword col=0; col < H_n_cols; ++col)
249     {
250           eT* H_colptr = H.colptr(H_n_cols_m1 - col);
251     const eT* G_colptr = G.colptr(col);
252 
253     for(uword row=0; row < H_n_rows; ++row)
254       {
255       H_colptr[H_n_rows_m1 - row] = G_colptr[row];
256       }
257     }
258 
259 
260   Mat<eT> X( (W.n_rows + 2*H_n_rows_m1), (W.n_cols + 2*H_n_cols_m1), arma_zeros_indicator() );
261 
262   X( H_n_rows_m1, H_n_cols_m1, arma::size(W) ) = W;  // zero padded version of 2D image
263 
264 
265   out.set_size( out_n_rows, out_n_cols );
266 
267   for(uword col=0; col < out_n_cols; ++col)
268     {
269     eT* out_colptr = out.colptr(col);
270 
271     for(uword row=0; row < out_n_rows; ++row)
272       {
273       // out.at(row, col) = accu( H % X(row, col, size(H)) );
274 
275       eT acc = eT(0);
276 
277       for(uword H_col = 0; H_col < H_n_cols; ++H_col)
278         {
279         const eT* X_colptr = X.colptr(col + H_col);
280 
281         acc += op_dot::direct_dot( H_n_rows, H.colptr(H_col), &(X_colptr[row]) );
282         }
283 
284       out_colptr[row] = acc;
285       }
286     }
287   }
288 
289 
290 
291 template<typename T1, typename T2>
292 inline
293 void
apply(Mat<typename T1::elem_type> & out,const Glue<T1,T2,glue_conv2> & expr)294 glue_conv2::apply(Mat<typename T1::elem_type>& out, const Glue<T1,T2,glue_conv2>& expr)
295   {
296   arma_extra_debug_sigprint();
297 
298   typedef typename T1::elem_type eT;
299 
300   const quasi_unwrap<T1> UA(expr.A);
301   const quasi_unwrap<T2> UB(expr.B);
302 
303   const Mat<eT>& A = UA.M;
304   const Mat<eT>& B = UB.M;
305 
306   const uword mode = expr.aux_uword;
307 
308   if(mode == 0)  // full convolution
309     {
310     glue_conv2::apply(out, A, B);
311     }
312   else
313   if(mode == 1)  // same size as A
314     {
315     Mat<eT> tmp;
316 
317     glue_conv2::apply(tmp, A, B);
318 
319     if( (tmp.is_empty() == false) && (A.is_empty() == false) && (B.is_empty() == false) )
320       {
321       const uword start_row = uword( std::floor( double(B.n_rows) / double(2) ) );
322       const uword start_col = uword( std::floor( double(B.n_cols) / double(2) ) );
323 
324       out = tmp(start_row, start_col, arma::size(A));
325       }
326     else
327       {
328       out.zeros( arma::size(A) );
329       }
330     }
331   }
332 
333 
334 
335 //! @}
336