1 /*******************************************************************************
2 * Copyright 2016-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 /// @file
18 /// C API types definitions
19 
20 #ifndef ONEAPI_DNNL_DNNL_TYPES_H
21 #define ONEAPI_DNNL_DNNL_TYPES_H
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
27 /// @cond DO_NOT_DOCUMENT_THIS
28 #include <stddef.h>
29 #include <stdint.h>
30 /// @endcond
31 
32 /// @addtogroup dnnl_api
33 /// @{
34 
35 /// @addtogroup dnnl_api_utils
36 /// @{
37 
38 /// Status values returned by the library functions.
39 typedef enum {
40     /// The operation was successful
41     dnnl_success = 0,
42     /// The operation failed due to an out-of-memory condition
43     dnnl_out_of_memory = 1,
44     /// The operation failed because of incorrect function arguments
45     dnnl_invalid_arguments = 2,
46     /// The operation failed because requested functionality is not implemented
47     dnnl_unimplemented = 3,
48     /// Primitive iterator passed over last primitive descriptor
49     dnnl_iterator_ends = 4,
50     /// Primitive or engine failed on execution
51     dnnl_runtime_error = 5,
52     /// Queried element is not required for given primitive
53     dnnl_not_required = 6,
54 } dnnl_status_t;
55 
56 /// @} dnnl_api_utils
57 
58 /// @addtogroup dnnl_api_memory
59 /// @{
60 
61 /// Data type specification
62 typedef enum {
63     /// Undefined data type, used for empty memory descriptors.
64     dnnl_data_type_undef = 0,
65     /// 16-bit/half-precision floating point.
66     dnnl_f16 = 1,
67     /// non-standard 16-bit (bfloat16 w/ 7 bit mantissa) floating point.
68     dnnl_bf16 = 2,
69     /// 32-bit/single-precision floating point.
70     dnnl_f32 = 3,
71     /// 32-bit signed integer.
72     dnnl_s32 = 4,
73     /// 8-bit signed integer.
74     dnnl_s8 = 5,
75     /// 8-bit unsigned integer.
76     dnnl_u8 = 6,
77 } dnnl_data_type_t;
78 
79 /// Memory format kind
80 typedef enum {
81     /// Undefined memory format kind, used for empty memory descriptors.
82     dnnl_format_kind_undef = 0,
83     /// Unspecified format kind.
84     /// The primitive selects a format automatically.
85     dnnl_format_kind_any,
86     /// A tensor in a generic format described by the stride and blocking
87     /// values in each dimension. See @ref dnnl_blocking_desc_t for more
88     /// information.
89     dnnl_blocked,
90     /// Weights format used in 8bit Winograd convolution
91     dnnl_format_kind_wino,
92     /// Packed weights format used in RNN
93     dnnl_format_kind_rnn_packed,
94 } dnnl_format_kind_t;
95 
96 /// Memory format tag specification.
97 ///
98 /// oneDNN formats describe physical data layout. The physical layout
99 /// is described as a sequence of the dimensions as they are laid out in the
100 /// memory (from the outer-most to the inner-most). Note that this order
101 /// doesn't affect the logical order of the dimensions that is kept in the
102 /// `dims` field of the dnnl_memory_desc_t structure. The logical order of the
103 /// dimensions is specified by the primitive that uses the tensor.
104 ///
105 /// For example, CNN 5D tensor always has its logical dimensions in the order
106 /// `(batch, channels, depth, height, width)`, while the physical layout might be
107 /// `NCDHW` (corresponds to #dnnl_ncdhw format tag) or
108 /// `NDHWC` (corresponds to #dnnl_ndhwc format tag).
109 ///
110 /// ~~~cpp
111 /// int batch = 2, channels = 16, depth = 13, height = 13, width = 13;
112 ///
113 /// int ndims = 5; // 5D tensor
114 /// dnnl_dims_t dims = {batch, channels, depth, height, width};
115 /// dnnl_memory_desc_t data_in_ncdhw;
116 /// dnnl_memory_desc_init_by_tag(
117 ///      &data_in_ncdhw, 5, dims, dnnl_f32, dnnl_ncdhw);
118 ///
119 /// // note that in both cases dims passed are the same
120 /// dnnl_memory_desc_t data_in_ndhwc;
121 /// dnnl_memory_desc_init_by_tag(
122 ///      &data_in_ndhwc, 5, dims, dnnl_f32, dnnl_ndhwc);
123 /// ~~~
124 ///
125 /// Memory format tags can be further divided into two categories:
126 ///  - Domain-agnostic names, i.e. names the do not depend on the tensor usage
127 ///    in the specific primitive. These names use letters from `a` to `l` to
128 ///    denote logical dimension from 1 to 12, and form the order in which the
129 ///    dimensions are laid in memory. For instance, #dnnl_ab is used to denote
130 ///    2D tensor where the second logical dimension (aka `b`) is the innermost,
131 ///    i.e. has stride = 1, and the first logical dimension (`a`) laid out in
132 ///    memory with stride equal to the size of second dimension. On the other
133 ///    hand, #dnnl_ba is just transposed version of the same tensor: the
134 ///    first dimension (`a`) becomes the innermost one.
135 ///  - Domain-specific names, i.e. names that make sense only in the context of
136 ///    a certain domain, such as CNN. This names are just aliases to the
137 ///    corresponding domain-agnostic tags and used mostly for the convenience.
138 ///    For example, #dnnl_nc is used to denote 2D CNN activations tensor
139 ///    memory format, where channels are the innermost dimension and batch is an
140 ///    outermost one. Moreover, #dnnl_nc is just an alias to #dnnl_ab,
141 ///    since for oneDNN CNN primitives the logical dimensions of
142 ///    activations tensors come in order: batch, channels, spatial.
143 ///    In other words, batch corresponds to the first logical dimension (`a`),
144 ///    channels correspond to the second one (`b`).
145 ///
146 /// The following domain-specific notation applies to memory format tags:
147 ///  - @c 'n' denotes the mini-batch dimension
148 ///  - @c 'c' denotes a channels dimension
149 ///  - When there are multiple channel dimensions (for example, in convolution
150 ///    weights tensor), @c 'i' and @c 'o' denote dimensions of input and output
151 ///    channels
152 ///  - @c 'd', @c 'h', and @c 'w' denote spatial depth, height, and width
153 ///    respectively
154 ///
155 /// Upper-case letters indicate that the data is laid out in blocks for a
156 /// particular dimension. In such cases, the format name contains both upper-
157 /// and lower-case letters for that dimension with a lower-case letter preceded
158 /// by the block size. For example: #dnnl_nChw8c describes a format where the
159 /// outermost dimension is mini-batch, followed by the channel block number,
160 /// followed by the spatial height and width, and finally followed by 8-element
161 /// channel blocks.
162 ///
163 /// @sa @ref dev_guide_understanding_memory_formats
164 typedef enum {
165     /// Undefined memory format tag
166     dnnl_format_tag_undef = 0,
167     /// Undefined memory format tag.
168     /// The primitive selects a format automatically.
169     dnnl_format_tag_any,
170 
171     // Semantic agnostic section
172     // The physical order of dimensions is defined by the permutation of the
173     // characters, assuming that ab..z defines the natural order.
174 
175     // Plain formats
176 
177     dnnl_a, ///< plain 1D tensor
178     dnnl_ab, ///< plain 2D tensor
179     dnnl_abc, ///< plain 3D tensor
180     dnnl_abcd, ///< plain 4D tensor
181     dnnl_acbd, ///< plain 4D tensor
182     dnnl_abcde, ///< plain 5D tensor
183     dnnl_abcdef, ///< plain 6D tensor
184     dnnl_abcdefg, ///< plain 7D tensor
185     dnnl_abcdefgh, ///< plain 8D tensor
186     dnnl_abcdefghi, ///< plain 9D tensor
187     dnnl_abcdefghij, ///< plain 10D tensor
188     dnnl_abcdefghijk, ///< plain 11D tensor
189     dnnl_abcdefghijkl, ///< plain 12D tensor
190 
191     // Permuted plain formats
192 
193     dnnl_abdc, ///< permuted 4D tensor
194     dnnl_abdec, ///< permuted 5D tensor
195     dnnl_acb, ///< permuted 3D tensor
196     dnnl_acbde, ///< permuted 5D tensor
197     dnnl_acbdef, ///< permuted 6D tensor
198     dnnl_acdb, ///< permuted 4D tensor
199     dnnl_acdeb, ///< permuted 5D tensor
200     dnnl_ba, ///< permuted 2D tensor
201     dnnl_bac, ///< permuted 3D tensor
202     dnnl_bacd, ///< permuted 4D tensor
203     dnnl_bacde, ///< permuted 5D tensor
204     dnnl_bca, ///< permuted 3D tensor
205     dnnl_bcda, ///< permuted 4D tensor
206     dnnl_bcdea, ///< permuted 5D tensor
207     dnnl_cba, ///< permuted 3D tensor
208     dnnl_cdba, ///< permuted 4D tensor
209     dnnl_dcab, ///< permuted 4D tensor
210     dnnl_cdeba, ///< permuted 5D tensor
211     dnnl_decab, ///< permuted 5D tensor
212     dnnl_defcab, ///< permuted 6D tensor
213     dnnl_abced, ///< permuted 5D tensor
214     dnnl_abcdfe, ///< permuted 6D tensor
215     dnnl_abcdegf, ///< permuted 7D tensor
216     dnnl_abcdefhg, ///< permuted 8D tensor
217     dnnl_abcdefgih, ///< permuted 9D tensor
218     dnnl_abcdefghji, ///< permuted 10D tensor
219     dnnl_abcdefghikj, ///< permuted 11D tensor
220     dnnl_abcdefghijlk, ///< permuted 12D tensor
221 
222     // Opaque blocked formats
223 
224     dnnl_Abc16a,
225     dnnl_ABc16a16b,
226     dnnl_ABc32a32b,
227     dnnl_ABc4a4b,
228     /// 3D tensor blocked by 2nd dimension with block size 16
229     dnnl_aBc16b,
230     dnnl_ABc16b16a,
231     dnnl_Abc4a,
232     /// 3D tensor blocked by 2nd dimension with block size 32
233     dnnl_aBc32b,
234     /// 3D tensor blocked by 2nd dimension with block size 4
235     dnnl_aBc4b,
236     dnnl_ABc4b16a4b,
237     dnnl_ABc2b8a4b,
238     dnnl_ABc16b16a4b,
239     dnnl_ABc16b16a2b,
240     dnnl_ABc4b4a,
241     dnnl_ABc8a16b2a,
242     dnnl_ABc8a8b,
243     dnnl_ABc8a4b,
244     /// 3D tensor blocked by 2nd dimension with block size 8
245     dnnl_aBc8b,
246     dnnl_ABc8b16a2b,
247     dnnl_BAc8a16b2a,
248     dnnl_ABc8b8a,
249     dnnl_Abcd16a,
250     dnnl_Abcd8a,
251     dnnl_ABcd16a16b,
252     dnnl_Abcd32a,
253     dnnl_ABcd32a32b,
254     /// 4D tensor blocked by 2nd dimension with block size 16
255     dnnl_aBcd16b,
256     dnnl_ABcd16b16a,
257     dnnl_aBCd16b16c,
258     dnnl_aBCd16c16b,
259     dnnl_Abcd4a,
260     /// 4D tensor blocked by 2nd dimension with block size 32
261     dnnl_aBcd32b,
262     /// 4D tensor blocked by 2nd dimension with block size 4
263     dnnl_aBcd4b,
264     dnnl_ABcd4b16a4b,
265     dnnl_ABcd16b16a4b,
266     dnnl_ABcd16b16a2b,
267     dnnl_ABcd4b4a,
268     dnnl_ABcd4a4b,
269     dnnl_aBCd2c4b2c,
270     dnnl_aBCd4b8c2b,
271     dnnl_aBCd4c16b4c,
272     dnnl_aBCd2c8b4c,
273     dnnl_aBCd16c16b4c,
274     dnnl_aBCd16c16b2c,
275     dnnl_aBCd4c4b,
276     dnnl_aBCd4b4c,
277     dnnl_ABcd8a16b2a,
278     dnnl_ABcd2b8a4b,
279     dnnl_ABcd8a8b,
280     dnnl_ABcd8a4b,
281     /// 4D tensor blocked by 2nd dimension with block size 8
282     dnnl_aBcd8b,
283     dnnl_aBCd4c8b2c,
284     dnnl_ABcd8b16a2b,
285     dnnl_aBCd8b16c2b,
286     dnnl_BAcd8a16b2a,
287     /// 4D tensor blocked by 1st and 2nd dimension with block size 8
288     dnnl_ABcd8b8a,
289     dnnl_aBCd8b8c,
290     dnnl_aBCd8b4c,
291     dnnl_aBCd8c16b2c,
292     dnnl_ABcde8a16b2a,
293     dnnl_aCBd8b16c2b,
294     dnnl_aBCd8c8b,
295     dnnl_Abcde16a,
296     dnnl_Abcde32a,
297     dnnl_ABcde16a16b,
298     dnnl_BAcde8a16b2a,
299     /// 4D tensor blocked by 3rd dimension with block size 4
300     dnnl_aBCd2b4c2b,
301     /// 5D tensor blocked by 1st dimension with block size 16
302     dnnl_ABcde4b16a4b,
303     /// 5D tensor blocked by 1st dimension with block size 8
304     dnnl_ABcde2b8a4b,
305     /// 5D tensor blocked by 2nd dimension with block size 16
306     dnnl_aBcde16b,
307     dnnl_ABcde16b16a,
308     dnnl_aBCde16b16c,
309     dnnl_aBCde16c16b,
310     dnnl_aBCde2c8b4c,
311     dnnl_Abcde4a,
312     /// 5D tensor blocked by 2nd dimension with block size 32
313     dnnl_aBcde32b,
314     /// 5D tensor blocked by 2nd dimension with block size 4
315     dnnl_aBcde4b,
316     dnnl_ABcde4b4a,
317     dnnl_ABcde4a4b,
318     dnnl_aBCde4b4c,
319     dnnl_aBCde2c4b2c,
320     dnnl_aBCde4b8c2b,
321     dnnl_aBCde4c16b4c,
322     dnnl_aBCde16c16b4c,
323     dnnl_aBCde16c16b2c,
324     dnnl_aBCde4c4b,
325     dnnl_Abcde8a,
326     dnnl_ABcde8a8b,
327     dnnl_ABcde8a4b,
328     dnnl_BAcde16b16a,
329     /// 5D tensor blocked by 2nd dimension with block size 8
330     dnnl_aBcde8b,
331     dnnl_ABcde8b16a2b,
332     dnnl_aBCde8b16c2b,
333     dnnl_aBCde4c8b2c,
334     dnnl_aCBde8b16c2b,
335     dnnl_ABcde8b8a,
336     dnnl_ABcde32a32b,
337     dnnl_aBCde8b8c,
338     dnnl_aBCde8b4c,
339     dnnl_ABc4a8b8a4b,
340     dnnl_ABcd4a8b8a4b,
341     dnnl_ABcde4a8b8a4b,
342     dnnl_BAc4b8a8b4a,
343     dnnl_BAcd4b8a8b4a,
344     dnnl_BAcde4b8a8b4a,
345     dnnl_ABcd2a8b8a2b,
346     dnnl_aBCd4b8c8b4c,
347     dnnl_aBCde4b8c8b4c,
348     dnnl_aBCde2b8c8b2c,
349     dnnl_aBCde8c16b2c,
350     dnnl_aBCde8c8b,
351     /// 5D tensor blocked by 3rd dimension with block size 4
352     dnnl_aBCde2b4c2b,
353     /// 6D tensor blocked by 2nd dimension with block size 16
354     dnnl_aBcdef16b,
355     dnnl_aBCdef16b16c,
356     dnnl_aBCdef16c16b,
357     dnnl_aBCdef4c16b4c,
358     /// 6D tensor blocked by 2nd dimension with block size 8
359     dnnl_aBCdef2c8b4c,
360     dnnl_aBCdef4c8b2c,
361     /// 6D tensor blocked by 3rd dimension with block size 4
362     dnnl_aBCdef2b4c2b,
363     /// 6D tensor blocked by 2nd dimension with block size 4
364     dnnl_aBcdef4b,
365     dnnl_aBCdef4c4b,
366     dnnl_aBCdef4b4c,
367     dnnl_aBCdef2c4b2c,
368     dnnl_aBCdef4b8c2b,
369     dnnl_aBCdef8b8c,
370     dnnl_aBCdef8b4c,
371     dnnl_aBCdef8c16b2c,
372     dnnl_aBCdef4b8c8b4c,
373     dnnl_aBCdef8b16c2b,
374     dnnl_aCBdef8b16c2b,
375     dnnl_aBCdef8c8b,
376     dnnl_aBdc16b,
377     dnnl_aBdC16b2c,
378     dnnl_aBdC16b4c,
379     dnnl_aBdc4b,
380     dnnl_aBdc8b,
381     dnnl_aBdec16b,
382     dnnl_aBdeC16b2c,
383     dnnl_aBdeC16b4c,
384     dnnl_aBdec32b,
385     dnnl_aBdec4b,
386     dnnl_aBdec8b,
387     dnnl_aBdefc16b,
388     dnnl_aBdefC16b2c,
389     dnnl_aCBdef16c16b,
390     dnnl_aBdefc4b,
391     dnnl_aBdefc8b,
392     dnnl_Abcdef16a,
393     dnnl_Abcdef32a,
394     dnnl_aBedc16b,
395     dnnl_Acb16a,
396     dnnl_AcB16a2b,
397     dnnl_AcB16a4b,
398     dnnl_Acb4a,
399     dnnl_Acb8a,
400     dnnl_aCBd16b16c,
401     dnnl_aCBd16c16b,
402     dnnl_aCBde16b16c,
403     dnnl_aCBde16c16b,
404     dnnl_Acdb16a,
405     dnnl_AcdB16a2b,
406     dnnl_AcdB16a4b,
407     dnnl_Acdb32a,
408     dnnl_Acdb4a,
409     dnnl_Acdb8a,
410     dnnl_Acdeb16a,
411     dnnl_AcdeB16a2b,
412     dnnl_Acdeb4a,
413     dnnl_Acdeb8a,
414     dnnl_Adcb16a,
415     dnnl_BAc16a16b,
416     dnnl_BAc16b16a,
417     dnnl_BAcd16a16b,
418     dnnl_BAcd16b16a,
419     dnnl_aCBd4c8b8c4b,
420     dnnl_aCBde4c8b8c4b,
421     dnnl_aCBdef4c8b8c4b,
422     dnnl_BAcde16a16b,
423     dnnl_aCBdef16b16c,
424     dnnl_abdfce, ///< permuted 6D tensor
425     dnnl_abdefc, ///< permuted 6D tensor
426     dnnl_ABc16b32a,
427     dnnl_ABc16b64a,
428     dnnl_ABc4b32a4b,
429     dnnl_ABc4b64a4b,
430     dnnl_ABc8b32a2b,
431     dnnl_ABc8b64a2b,
432     dnnl_AB16b16a,
433     dnnl_AB16b32a,
434     dnnl_AB16b64a,
435     dnnl_AB8b16a2b,
436     dnnl_AB8b32a2b,
437     dnnl_AB8b64a2b,
438     dnnl_AB4b16a4b,
439     dnnl_AB4b32a4b,
440     dnnl_AB4b64a4b,
441     dnnl_AB16b16a4b,
442     dnnl_ABcd16b32a,
443     dnnl_ABcd16b64a,
444     dnnl_ABcd4b32a4b,
445     dnnl_ABcd4b64a4b,
446     dnnl_ABcd8b32a2b,
447     dnnl_ABcd8b64a2b,
448     dnnl_ABcde4b32a4b,
449     dnnl_ABcde4b64a4b,
450     dnnl_ABcde16b16a4b,
451     dnnl_ABcde16b16a2b,
452     dnnl_ABcde16b32a,
453     dnnl_ABcde16b64a,
454     dnnl_ABcde8b32a2b,
455     dnnl_ABcde8b64a2b,
456     dnnl_aBCdef16c16b4c,
457     dnnl_aBCdef16c16b2c,
458     dnnl_AB32a32b8a4b,
459     dnnl_AB8a4b,
460     dnnl_AB32a32b8a2b,
461     dnnl_AB8a2b,
462     dnnl_abDc32d,
463     dnnl_abDC32d4c,
464     dnnl_abdEc32e,
465     dnnl_abdEC32e2c,
466     dnnl_abdEC32e4c,
467     dnnl_aBdefC16b4c,
468     dnnl_AcdeB16a4b,
469     dnnl_ABcd16a16b2a,
470     dnnl_ABc16a16b2a,
471     dnnl_aBCd16b16c2b,
472     dnnl_aBCde16b16c2b,
473     dnnl_Acb32a,
474     dnnl_AcB32a2b,
475     dnnl_AcB32a4b,
476     dnnl_Acb48a,
477     dnnl_AcB48a2b,
478     dnnl_AcB48a4b,
479     dnnl_Acb64a,
480     dnnl_AcB64a2b,
481     dnnl_AcB64a4b,
482     dnnl_cBa2b,
483     dnnl_cBa4b,
484     dnnl_aBdc32b,
485     dnnl_aBdC32b2c,
486     dnnl_aBdC32b4c,
487     dnnl_aBdc48b,
488     dnnl_aBdC48b2c,
489     dnnl_aBdC48b4c,
490     dnnl_aBdc64b,
491     dnnl_aBdC64b2c,
492     dnnl_aBdC64b4c,
493     dnnl_adcb,
494     dnnl_adCb2c,
495     dnnl_adCb4c,
496     dnnl_AcdB32a2b,
497     dnnl_AcdB32a4b,
498     dnnl_Acdb48a,
499     dnnl_AcdB48a2b,
500     dnnl_AcdB48a4b,
501     dnnl_Acdb64a,
502     dnnl_AcdB64a2b,
503     dnnl_AcdB64a4b,
504     dnnl_cdBa2b,
505     dnnl_cdBa4b,
506     dnnl_aBdeC32b2c,
507     dnnl_aBdeC32b4c,
508     dnnl_aBdec48b,
509     dnnl_aBdeC48b2c,
510     dnnl_aBdeC48b4c,
511     dnnl_aBdec64b,
512     dnnl_aBdeC64b2c,
513     dnnl_aBdeC64b4c,
514     dnnl_adecb,
515     dnnl_adeCb2c,
516     dnnl_adeCb4c,
517     dnnl_Acdeb32a,
518     dnnl_AcdeB32a2b,
519     dnnl_AcdeB32a4b,
520     dnnl_Acdeb48a,
521     dnnl_AcdeB48a2b,
522     dnnl_AcdeB48a4b,
523     dnnl_Acdeb64a,
524     dnnl_AcdeB64a2b,
525     dnnl_AcdeB64a4b,
526     dnnl_cdeBa2b,
527     dnnl_cdeBa4b,
528     dnnl_aBdefc32b,
529     dnnl_aBdefC32b2c,
530     dnnl_aBdefC32b4c,
531     dnnl_aBdefc48b,
532     dnnl_aBdefC48b2c,
533     dnnl_aBdefC48b4c,
534     dnnl_aBdefc64b,
535     dnnl_aBdefC64b2c,
536     dnnl_aBdefC64b4c,
537     dnnl_adefcb,
538     dnnl_adefCb2c,
539     dnnl_adefCb4c,
540     dnnl_AB16b32a4b,
541     dnnl_AB16b48a4b,
542     dnnl_AB16b64a4b,
543     dnnl_AB16b16a2b,
544     dnnl_AB16b32a2b,
545     dnnl_AB16b48a2b,
546     dnnl_AB16b64a2b,
547     dnnl_ABc16b32a4b,
548     dnnl_ABc16b48a4b,
549     dnnl_ABc16b64a4b,
550     dnnl_ABc16b32a2b,
551     dnnl_ABc16b48a2b,
552     dnnl_ABc16b64a2b,
553     dnnl_ABcd16b32a4b,
554     dnnl_ABcd16b48a4b,
555     dnnl_ABcd16b64a4b,
556     dnnl_ABcd16b32a2b,
557     dnnl_ABcd16b48a2b,
558     dnnl_ABcd16b64a2b,
559     dnnl_ABcde16b32a4b,
560     dnnl_ABcde16b48a4b,
561     dnnl_ABcde16b64a4b,
562     dnnl_ABcde16b32a2b,
563     dnnl_ABcde16b48a2b,
564     dnnl_ABcde16b64a2b,
565     dnnl_ABc32a16b,
566     dnnl_ABcd32a16b,
567     dnnl_ABcde32a16b,
568     dnnl_AB48a16b,
569     dnnl_AB48a32b,
570     dnnl_ABc40a16b,
571     dnnl_ABc40a32b,
572     dnnl_aBC48b16c,
573     dnnl_aBC48b32c,
574     dnnl_ABcd40a16b,
575     dnnl_ABcd40a32b,
576     dnnl_abCd32c,
577     dnnl_abdCe32c,
578     dnnl_abdCE32c2e,
579     dnnl_BA16a16b2a,
580     dnnl_BA16a32b2a,
581     dnnl_BA16a48b2a,
582     dnnl_BA16a64b2a,
583     dnnl_BA16a16b4a,
584     dnnl_BA16a32b4a,
585     dnnl_BA16a48b4a,
586     dnnl_BA16a64b4a,
587     dnnl_ABcd8a2b,
588     dnnl_aBdeC16c16b2c,
589     dnnl_aBdeC16c16b4c,
590     dnnl_aBdefC16c16b2c,
591     dnnl_AcB16b16a2b,
592     dnnl_AcB16b16a4b,
593     dnnl_AcdB16b16a2b,
594     dnnl_AcdB16b16a4b,
595     dnnl_AcdeB16b16a2b,
596     dnnl_aBdefC16c16b4c,
597     dnnl_AcdeB16b16a4b,
598     dnnl_AcB16b32a2b,
599     dnnl_AcB16b32a4b,
600     dnnl_AcB16b48a2b,
601     dnnl_AcB16b48a4b,
602     dnnl_AcB16b64a2b,
603     dnnl_AcB16b64a4b,
604     dnnl_aBdC16c16b2c,
605     dnnl_aBdC16c16b4c,
606     dnnl_aBdC16c32b2c,
607     dnnl_aBdC16c32b4c,
608     dnnl_aBdC16c48b2c,
609     dnnl_aBdC16c48b4c,
610     dnnl_aBdC16c64b2c,
611     dnnl_aBdC16c64b4c,
612     dnnl_AcdB16b32a2b,
613     dnnl_AcdB16b32a4b,
614     dnnl_AcdB16b48a2b,
615     dnnl_AcdB16b48a4b,
616     dnnl_AcdB16b64a2b,
617     dnnl_AcdB16b64a4b,
618     dnnl_aBdeC16c32b2c,
619     dnnl_aBdeC16c32b4c,
620     dnnl_aBdeC16c48b2c,
621     dnnl_aBdeC16c48b4c,
622     dnnl_aBdeC16c64b2c,
623     dnnl_aBdeC16c64b4c,
624     dnnl_AcdeB16b32a2b,
625     dnnl_AcdeB16b32a4b,
626     dnnl_AcdeB16b48a2b,
627     dnnl_AcdeB16b48a4b,
628     dnnl_AcdeB16b64a2b,
629     dnnl_AcdeB16b64a4b,
630     dnnl_aBdefC16c32b2c,
631     dnnl_aBdefC16c32b4c,
632     dnnl_aBdefC16c48b2c,
633     dnnl_aBdefC16c48b4c,
634     dnnl_aBdefC16c64b2c,
635     dnnl_aBdefC16c64b4c,
636     dnnl_decbA16a,
637     dnnl_ABc4a2b,
638     dnnl_ABc8a2b,
639     dnnl_aBCd8b2c,
640     dnnl_ABcde4a2b,
641     dnnl_ABcde8a2b,
642     dnnl_ABcde40a16b,
643     dnnl_ABcde40a32b,
644     dnnl_aBCde8b2c,
645     dnnl_ABcde4a8b8a2b,
646     dnnl_ABcd4a8b8a2b,
647     dnnl_ABc4a8b8a2b,
648     dnnl_aBCdef4b8c8b2c,
649     dnnl_aBCde4b8c8b2c,
650     dnnl_aBCd4b8c8b2c,
651     dnnl_BAcde4b8a8b2a,
652     dnnl_BAcd4b8a8b2a,
653     dnnl_BAc4b8a8b2a,
654     dnnl_aCBdef4c8b8c2b,
655     dnnl_aCBde4c8b8c2b,
656     dnnl_aCBd4c8b8c2b,
657     dnnl_aBCdef8b2c,
658     dnnl_AB32a16b,
659     dnnl_AB32a32b,
660     dnnl_BA4b8a8b2a,
661     dnnl_BA4b8a8b4a,
662     dnnl_aBC32b16c,
663     dnnl_aBC32b32c,
664     dnnl_aCB4c8b8c2b,
665     dnnl_aCB4c8b8c4b,
666     dnnl_ABcd4a2b,
667     dnnl_ABc2b8a16b4a,
668     dnnl_ABcd2b8a16b4a,
669     dnnl_ABcde2b8a16b4a,
670     dnnl_ABc2a8b16a4b,
671     dnnl_ABc2a8b16a2b,
672     dnnl_ABc2b32a8b,
673     dnnl_ABcd2a8b16a4b,
674     dnnl_ABcd2a8b16a2b,
675     dnnl_aCBd2c8b16c2b,
676     dnnl_ABcd2b32a8b,
677     dnnl_aBCd2c8b16c2b,
678     dnnl_ABcde2a8b16a4b,
679     dnnl_ABcde2a8b16a2b,
680     dnnl_aCBde2c8b16c2b,
681     dnnl_ABcde2b32a8b,
682     dnnl_aBC2b8c16b2c,
683     dnnl_aBCd2b8c16b2c,
684     dnnl_aBCde2b8c16b2c,
685     dnnl_aBCdef2b8c16b2c,
686     dnnl_BAcde2b8a16b4a,
687     dnnl_BAcd2b8a16b4a,
688     dnnl_BAc2b8a16b4a,
689     dnnl_BAcde2b8a16b2a,
690     dnnl_BAcd2b8a16b2a,
691     dnnl_BAc2b8a16b2a,
692     dnnl_aBCde2c8b16c2b,
693     dnnl_aBCdef2c8b16c2b,
694     dnnl_aCBdef2c8b16c2b,
695     dnnl_aBCd2b8c16b4c,
696     dnnl_aBCde2b8c16b4c,
697     dnnl_BA4b8a16b2a,
698     dnnl_BA4b8a16b4a,
699     dnnl_aCB4c8b16c2b,
700     dnnl_aCB4c8b16c4b,
701     dnnl_BA16a16b,
702     dnnl_BA16a32b,
703     dnnl_BA16a48b,
704     dnnl_BA16a64b,
705 
706     /// Just a sentinel, not real memory format tag. Must be changed after new
707     /// format tag is added.
708     dnnl_format_tag_last,
709 
710     // Aliases
711 
712     /// 1D tensor, an alias to #dnnl_a
713     dnnl_x = dnnl_a,
714     /// 2D CNN activations tensor, an alias to #dnnl_ab
715     dnnl_nc = dnnl_ab,
716     /// 2D CNN activations tensor, an alias to #dnnl_ba
717     dnnl_cn = dnnl_ba,
718     /// 2D RNN statistics tensor, an alias to #dnnl_ab
719     dnnl_tn = dnnl_ab,
720     /// 2D RNN statistics tensor, an alias to #dnnl_ba
721     dnnl_nt = dnnl_ba,
722     /// 3D CNN activations tensor, an alias to #dnnl_abc
723     dnnl_ncw = dnnl_abc,
724     /// 3D CNN activations tensor, an alias to #dnnl_acb
725     dnnl_nwc = dnnl_acb,
726     /// 4D CNN activations tensor, an alias to #dnnl_abcd
727     dnnl_nchw = dnnl_abcd,
728     /// 4D CNN activations tensor, an alias to #dnnl_acdb
729     dnnl_nhwc = dnnl_acdb,
730     /// 4D CNN activations tensor, an alias to #dnnl_bcda
731     dnnl_chwn = dnnl_bcda,
732     /// 5D CNN activations tensor, an alias to #dnnl_abcde
733     dnnl_ncdhw = dnnl_abcde,
734     /// 5D CNN activations tensor, an alias to #dnnl_acdeb
735     dnnl_ndhwc = dnnl_acdeb,
736 
737     /// 2D CNN weights tensor, an alias to #dnnl_ab
738     dnnl_oi = dnnl_ab,
739     /// 2D CNN weights tensor, an alias to #dnnl_ba
740     dnnl_io = dnnl_ba,
741     /// 3D CNN weights tensor, an alias to #dnnl_abc
742     dnnl_oiw = dnnl_abc,
743     /// 3D CNN weights tensor, an alias to #dnnl_acb
744     dnnl_owi = dnnl_acb,
745     /// 3D CNN weights tensor, an alias to #dnnl_cba
746     dnnl_wio = dnnl_cba,
747     /// 3D CNN weights tensor, an alias to #dnnl_bca
748     dnnl_iwo = dnnl_bca,
749     /// 4D CNN weights tensor, an alias to #dnnl_abcd
750     dnnl_oihw = dnnl_abcd,
751     /// 4D CNN weights tensor, an alias to #dnnl_cdba
752     dnnl_hwio = dnnl_cdba,
753     /// 4D CNN weights tensor, an alias to #dnnl_acdb
754     dnnl_ohwi = dnnl_acdb,
755     /// 4D CNN weights tensor, an alias to #dnnl_bcda
756     dnnl_ihwo = dnnl_bcda,
757     /// 4D CNN weights tensor, an alias to #dnnl_bacd
758     dnnl_iohw = dnnl_bacd,
759     /// 5D CNN weights tensor, an alias to #dnnl_abcde
760     dnnl_oidhw = dnnl_abcde,
761     /// 5D CNN weights tensor, an alias to #dnnl_bacde
762     dnnl_iodhw = dnnl_bacde,
763     /// 5D CNN weights tensor, an alias to #dnnl_cdeba
764     dnnl_dhwio = dnnl_cdeba,
765     /// 5D CNN weights tensor, an alias to #dnnl_acdeb
766     dnnl_odhwi = dnnl_acdeb,
767     /// 5D CNN weights tensor, an alias to #dnnl_bcdea
768     dnnl_idhwo = dnnl_bcdea,
769 
770     /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abcd
771     dnnl_goiw = dnnl_abcd,
772     /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abdc
773     dnnl_gowi = dnnl_abdc,
774     /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_dcab
775     dnnl_wigo = dnnl_dcab,
776     /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abcde
777     dnnl_goihw = dnnl_abcde,
778     /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abdec
779     dnnl_gohwi = dnnl_abdec,
780     /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_decab
781     dnnl_hwigo = dnnl_decab,
782     /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_acbde
783     dnnl_giohw = dnnl_acbde,
784     /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abcdef
785     dnnl_goidhw = dnnl_abcdef,
786     /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abdefc
787     dnnl_godhwi = dnnl_abdefc,
788     /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_acbdef
789     dnnl_giodhw = dnnl_acbdef,
790     /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_defcab
791     dnnl_dhwigo = dnnl_defcab,
792 
793     /// 3D RNN data tensor in the format (seq_length, batch, input channels),
794     /// an alias to #dnnl_abc.
795     dnnl_tnc = dnnl_abc,
796     /// 3D RNN data tensor in the format (batch, seq_length, input channels),
797     /// an alias to #dnnl_bac.
798     dnnl_ntc = dnnl_bac,
799     /// 4D RNN states tensor in the format (num_layers, num_directions,
800     /// batch, state channels), an alias to #dnnl_abcd.
801     dnnl_ldnc = dnnl_abcd,
802     /// 5D RNN weights tensor in the format (num_layers, num_directions,
803     /// input_channels, num_gates, output_channels), an alias to #dnnl_abcde.
804     ///
805     ///  - For LSTM cells, the gates order is input, forget, candidate
806     ///    and output gate.
807     ///  - For GRU cells, the gates order is update, reset and output gate.
808     dnnl_ldigo = dnnl_abcde,
809     /// 5D RNN weights tensor in the format (num_layers, num_directions,
810     /// num_gates, output_channels, input_channels), an alias to #dnnl_abdec.
811     ///
812     ///  - For LSTM cells, the gates order is input, forget, candidate
813     ///    and output gate.
814     ///  - For GRU cells, the gates order is update, reset and output gate.
815     dnnl_ldgoi = dnnl_abdec,
816     /// 4D LSTM projection tensor in the format (num_layers, num_directions,
817     /// num_channels_in_hidden_state, num_channels_in_recurrent_projection),
818     /// an alias to #dnnl_abcd.
819     dnnl_ldio = dnnl_abcd,
820     /// 4D LSTM projection tensor in the format (num_layers, num_directions,
821     /// num_channels_in_recurrent_projection, num_channels_in_hidden_state),
822     /// an alias to #dnnl_abdc.
823     dnnl_ldoi = dnnl_abdc,
824     /// 4D RNN bias tensor in the format (num_layers, num_directions,
825     /// num_gates, output_channels), an alias to #dnnl_abcd.
826     ///
827     ///  - For LSTM cells, the gates order is input, forget, candidate
828     ///    and output gate.
829     ///  - For GRU cells, the gates order is update, reset and output gate.
830     dnnl_ldgo = dnnl_abcd,
831     /// 5D LSTM projection tensor
832     dnnl_ldOi32o = dnnl_abDc32d,
833     dnnl_ldOI32o4i = dnnl_abDC32d4c,
834     dnnl_ldIo32i = dnnl_abCd32c,
835     /// 6D RNN weights tensor
836     dnnl_ldgOi32o = dnnl_abdEc32e,
837     dnnl_ldgOI32o2i = dnnl_abdEC32e2c,
838     dnnl_ldgOI32o4i = dnnl_abdEC32e4c,
839     dnnl_ldgIo32i = dnnl_abdCe32c,
840     dnnl_ldgIO32i2o = dnnl_abdCE32c2e,
841 
842     // Opaque data types, are not to be used explicitly
843 
844     // data
845     /// 5D CNN activations tensor blocked by channels with block size 32,
846     /// an alias to #dnnl_aBcde32b
847     dnnl_nCdhw32c = dnnl_aBcde32b,
848     /// 5D CNN activations tensor blocked by channels with block size 16,
849     /// an alias to #dnnl_aBcde16b
850     dnnl_nCdhw16c = dnnl_aBcde16b,
851     /// 5D CNN activations tensor blocked by channels with block size 4,
852     /// an alias to #dnnl_aBcde4b
853     dnnl_nCdhw4c = dnnl_aBcde4b,
854     /// 5D CNN activations tensor blocked by channels with block size 8,
855     /// an alias to #dnnl_aBcde8b
856     dnnl_nCdhw8c = dnnl_aBcde8b,
857     /// 4D CNN activations tensor blocked by channels with block size 32,
858     /// an alias to #dnnl_aBcd32b
859     dnnl_nChw32c = dnnl_aBcd32b,
860     /// 4D CNN activations tensor blocked by channels with block size 16,
861     /// an alias to #dnnl_aBcd16b
862     dnnl_nChw16c = dnnl_aBcd16b,
863     /// 4D CNN activations tensor blocked by channels with block size 4,
864     /// an alias to #dnnl_aBcd4b
865     dnnl_nChw4c = dnnl_aBcd4b,
866     /// 4D CNN activations tensor blocked by channels with block size 8,
867     /// an alias to #dnnl_aBcd8b
868     dnnl_nChw8c = dnnl_aBcd8b,
869     /// 3D CNN activations tensor blocked by channels with block size 32,
870     /// an alias to #dnnl_aBc32b
871     dnnl_nCw32c = dnnl_aBc32b,
872     /// 3D CNN activations tensor blocked by channels with block size 16,
873     /// an alias to #dnnl_aBc16b
874     dnnl_nCw16c = dnnl_aBc16b,
875     /// 3D CNN activations tensor blocked by channels with block size 4,
876     /// an alias to #dnnl_aBc4b
877     dnnl_nCw4c = dnnl_aBc4b,
878     /// 3D CNN activations tensor blocked by channels with block size 8,
879     /// an alias to #dnnl_aBc8b
880     dnnl_nCw8c = dnnl_aBc8b,
881     dnnl_NCw16n16c = dnnl_ABc16a16b,
882     dnnl_NCdhw16n16c = dnnl_ABcde16a16b,
883     dnnl_NChw16n16c = dnnl_ABcd16a16b,
884     dnnl_NCw32n16c = dnnl_ABc32a16b,
885     dnnl_NChw32n16c = dnnl_ABcd32a16b,
886     dnnl_NCdhw32n16c = dnnl_ABcde32a16b,
887     dnnl_NCw32n32c = dnnl_ABc32a32b,
888     dnnl_NChw32n32c = dnnl_ABcd32a32b,
889     dnnl_NCdhw32n32c = dnnl_ABcde32a32b,
890 
891     // weights, 2D
892     dnnl_OI16i16o = dnnl_AB16b16a,
893     dnnl_OI16i32o = dnnl_AB16b32a,
894     dnnl_OI16i64o = dnnl_AB16b64a,
895     dnnl_OI8i16o2i = dnnl_AB8b16a2b,
896     dnnl_OI8i32o2i = dnnl_AB8b32a2b,
897     dnnl_OI8i64o2i = dnnl_AB8b64a2b,
898     dnnl_OI4i16o4i = dnnl_AB4b16a4b,
899     dnnl_OI4i32o4i = dnnl_AB4b32a4b,
900     dnnl_OI4i64o4i = dnnl_AB4b64a4b,
901     dnnl_OI16i16o4i = dnnl_AB16b16a4b,
902     // weights, 3D
903     dnnl_IOw16o16i = dnnl_BAc16a16b,
904     dnnl_IOw16i16o = dnnl_BAc16b16a,
905     dnnl_OIw16i16o = dnnl_ABc16b16a,
906     dnnl_OIw16i32o = dnnl_ABc16b32a,
907     dnnl_OIw16i64o = dnnl_ABc16b64a,
908     dnnl_OIw16o16i = dnnl_ABc16a16b,
909     dnnl_Oiw16o = dnnl_Abc16a,
910     dnnl_OIw4i16o4i = dnnl_ABc4b16a4b,
911     dnnl_OIw4i32o4i = dnnl_ABc4b32a4b,
912     dnnl_OIw4i64o4i = dnnl_ABc4b64a4b,
913     dnnl_OIw2i8o4i = dnnl_ABc2b8a4b,
914     dnnl_OIw16i16o4i = dnnl_ABc16b16a4b,
915     dnnl_OIw16i16o2i = dnnl_ABc16b16a2b,
916     dnnl_OIw16o16i2o = dnnl_ABc16a16b2a,
917     dnnl_OIw4i4o = dnnl_ABc4b4a,
918     dnnl_OIw4o4i = dnnl_ABc4a4b,
919     dnnl_Oiw4o = dnnl_Abc4a,
920     dnnl_OIw8i16o2i = dnnl_ABc8b16a2b,
921     dnnl_OIw8i32o2i = dnnl_ABc8b32a2b,
922     dnnl_OIw8i64o2i = dnnl_ABc8b64a2b,
923     dnnl_OIw8i8o = dnnl_ABc8b8a,
924     dnnl_OIw8o16i2o = dnnl_ABc8a16b2a,
925     dnnl_IOw8o16i2o = dnnl_BAc8a16b2a,
926     dnnl_OIw8o8i = dnnl_ABc8a8b,
927     dnnl_OIw8o4i = dnnl_ABc8a4b,
928     dnnl_Owi16o = dnnl_Acb16a,
929     dnnl_OwI16o2i = dnnl_AcB16a2b,
930     dnnl_OwI16o4i = dnnl_AcB16a4b,
931     dnnl_Owi4o = dnnl_Acb4a,
932     dnnl_Owi8o = dnnl_Acb8a,
933 
934     // weights, 4D
935     dnnl_IOhw16i16o = dnnl_BAcd16b16a,
936     dnnl_IOhw16o16i = dnnl_BAcd16a16b,
937     dnnl_Ohwi16o = dnnl_Acdb16a,
938     dnnl_OhwI16o2i = dnnl_AcdB16a2b,
939     dnnl_OhwI16o4i = dnnl_AcdB16a4b,
940     dnnl_Ohwi32o = dnnl_Acdb32a,
941     dnnl_Ohwi4o = dnnl_Acdb4a,
942     dnnl_Ohwi8o = dnnl_Acdb8a,
943     dnnl_OIhw16i16o = dnnl_ABcd16b16a,
944     dnnl_OIhw16i32o = dnnl_ABcd16b32a,
945     dnnl_OIhw16i64o = dnnl_ABcd16b64a,
946     dnnl_OIhw16o16i = dnnl_ABcd16a16b,
947     dnnl_Oihw16o = dnnl_Abcd16a,
948     dnnl_OIhw4i16o4i = dnnl_ABcd4b16a4b,
949     dnnl_OIhw4i32o4i = dnnl_ABcd4b32a4b,
950     dnnl_OIhw4i64o4i = dnnl_ABcd4b64a4b,
951     dnnl_OIhw16i16o4i = dnnl_ABcd16b16a4b,
952     dnnl_OIhw16i16o2i = dnnl_ABcd16b16a2b,
953     dnnl_OIhw16o16i2o = dnnl_ABcd16a16b2a,
954     dnnl_OIhw4i4o = dnnl_ABcd4b4a,
955     dnnl_OIhw4o4i = dnnl_ABcd4a4b,
956     dnnl_Oihw4o = dnnl_Abcd4a,
957     dnnl_OIhw8i16o2i = dnnl_ABcd8b16a2b,
958     dnnl_OIhw8i32o2i = dnnl_ABcd8b32a2b,
959     dnnl_OIhw8i64o2i = dnnl_ABcd8b64a2b,
960     dnnl_OIhw8i8o = dnnl_ABcd8b8a,
961     dnnl_OIhw8o16i2o = dnnl_ABcd8a16b2a,
962     dnnl_OIhw2i8o4i = dnnl_ABcd2b8a4b,
963     dnnl_IOhw8o16i2o = dnnl_BAcd8a16b2a,
964     dnnl_OIhw8o8i = dnnl_ABcd8a8b,
965     dnnl_OIhw8o4i = dnnl_ABcd8a4b,
966     dnnl_Owhi16o = dnnl_Adcb16a,
967 
968     // weights, 5D
969     dnnl_Odhwi16o = dnnl_Acdeb16a,
970     dnnl_OdhwI16o2i = dnnl_AcdeB16a2b,
971     dnnl_OdhwI16o4i = dnnl_AcdeB16a4b,
972     dnnl_Odhwi4o = dnnl_Acdeb4a,
973     dnnl_Odhwi8o = dnnl_Acdeb8a,
974     dnnl_OIdhw16i16o = dnnl_ABcde16b16a,
975     dnnl_OIdhw16i32o = dnnl_ABcde16b32a,
976     dnnl_OIdhw16i64o = dnnl_ABcde16b64a,
977     dnnl_OIdhw16o16i = dnnl_ABcde16a16b,
978     dnnl_Oidhw16o = dnnl_Abcde16a,
979     dnnl_OIdhw4i4o = dnnl_ABcde4b4a,
980     dnnl_OIdhw4o4i = dnnl_ABcde4a4b,
981     dnnl_Oidhw4o = dnnl_Abcde4a,
982     dnnl_OIdhw8i16o2i = dnnl_ABcde8b16a2b,
983     dnnl_OIdhw8i32o2i = dnnl_ABcde8b32a2b,
984     dnnl_OIdhw8i64o2i = dnnl_ABcde8b64a2b,
985     dnnl_OIdhw8i8o = dnnl_ABcde8b8a,
986     dnnl_OIdhw8o16i2o = dnnl_ABcde8a16b2a,
987     dnnl_IOdhw8o16i2o = dnnl_BAcde8a16b2a,
988     dnnl_OIdhw4i16o4i = dnnl_ABcde4b16a4b,
989     dnnl_OIdhw4i32o4i = dnnl_ABcde4b32a4b,
990     dnnl_OIdhw4i64o4i = dnnl_ABcde4b64a4b,
991     dnnl_OIdhw16i16o4i = dnnl_ABcde16b16a4b,
992     dnnl_OIdhw16i16o2i = dnnl_ABcde16b16a2b,
993     dnnl_OIdhw2i8o4i = dnnl_ABcde2b8a4b,
994     dnnl_OIdhw8o8i = dnnl_ABcde8a8b,
995     dnnl_OIdhw8o4i = dnnl_ABcde8a4b,
996     dnnl_IOdhw16i16o = dnnl_BAcde16b16a,
997     dnnl_OIdhw4o8i8o4i = dnnl_ABcde4a8b8a4b,
998     dnnl_IOdhw16o16i = dnnl_BAcde16a16b,
999 
1000     // weights w/ groups, 3D
1001     dnnl_Goiw16g = dnnl_Abcd16a,
1002     dnnl_Goiw8g = dnnl_Abcd8a,
1003     dnnl_Goiw4g = dnnl_Abcd4a,
1004     dnnl_gIOw16o16i = dnnl_aCBd16b16c,
1005     dnnl_gIOw16i16o = dnnl_aCBd16c16b,
1006     dnnl_gOIw16i16o = dnnl_aBCd16c16b,
1007     dnnl_gOIw16o16i = dnnl_aBCd16b16c,
1008     dnnl_gOiw16o = dnnl_aBcd16b,
1009     dnnl_gOIw4i16o4i = dnnl_aBCd4c16b4c,
1010     dnnl_gOIw2i8o4i = dnnl_aBCd2c8b4c,
1011     dnnl_gOIw16i16o4i = dnnl_aBCd16c16b4c,
1012     dnnl_gOIw16i16o2i = dnnl_aBCd16c16b2c,
1013     dnnl_gOIw16o16i2o = dnnl_aBCd16b16c2b,
1014     dnnl_gOIw4i4o = dnnl_aBCd4c4b,
1015     dnnl_gOIw4o4i = dnnl_aBCd4b4c,
1016     dnnl_gOiw4o = dnnl_aBcd4b,
1017     dnnl_gOIw8i16o2i = dnnl_aBCd8c16b2c,
1018     dnnl_gOIw8i8o = dnnl_aBCd8c8b,
1019     dnnl_gOIw8o16i2o = dnnl_aBCd8b16c2b,
1020     dnnl_gIOw8o16i2o = dnnl_aCBd8b16c2b,
1021     dnnl_gOIw8o8i = dnnl_aBCd8b8c,
1022     dnnl_gOIw8o4i = dnnl_aBCd8b4c,
1023     dnnl_gOwi16o = dnnl_aBdc16b,
1024     dnnl_gOwI16o2i = dnnl_aBdC16b2c,
1025     dnnl_gOwI16o4i = dnnl_aBdC16b4c,
1026     dnnl_gOwi4o = dnnl_aBdc4b,
1027     dnnl_gOwi8o = dnnl_aBdc8b,
1028     dnnl_Goiw32g = dnnl_Abcd32a,
1029     dnnl_gOIw2i4o2i = dnnl_aBCd2c4b2c,
1030     dnnl_gOIw2o4i2o = dnnl_aBCd2b4c2b,
1031     dnnl_gOIw4i8o2i = dnnl_aBCd4c8b2c,
1032     dnnl_gOIw4o8i2o = dnnl_aBCd4b8c2b,
1033 
1034     // weights w/ groups, 4D
1035     dnnl_gIOhw16i16o = dnnl_aCBde16c16b,
1036     dnnl_gIOhw16o16i = dnnl_aCBde16b16c,
1037     dnnl_gOhwi16o = dnnl_aBdec16b,
1038     dnnl_gOhwI16o2i = dnnl_aBdeC16b2c,
1039     dnnl_gOhwI16o4i = dnnl_aBdeC16b4c,
1040     dnnl_gOhwi32o = dnnl_aBdec32b,
1041     dnnl_gOhwi4o = dnnl_aBdec4b,
1042     dnnl_gOhwi8o = dnnl_aBdec8b,
1043     dnnl_Goihw16g = dnnl_Abcde16a,
1044     dnnl_gOIhw16i16o = dnnl_aBCde16c16b,
1045     dnnl_gOIhw16o16i = dnnl_aBCde16b16c,
1046     dnnl_gOihw16o = dnnl_aBcde16b,
1047     dnnl_gOIhw2i8o4i = dnnl_aBCde2c8b4c,
1048     dnnl_gOIhw4i16o4i = dnnl_aBCde4c16b4c,
1049     dnnl_gOIhw16i16o4i = dnnl_aBCde16c16b4c,
1050     dnnl_gOIhw16i16o2i = dnnl_aBCde16c16b2c,
1051     dnnl_gOIhw16o16i2o = dnnl_aBCde16b16c2b,
1052     dnnl_gOIhw4i4o = dnnl_aBCde4c4b,
1053     dnnl_gOIhw4o4i = dnnl_aBCde4b4c,
1054     dnnl_gOihw4o = dnnl_aBcde4b,
1055     dnnl_Goihw8g = dnnl_Abcde8a,
1056     dnnl_Goihw4g = dnnl_Abcde4a,
1057     dnnl_gOIhw8i16o2i = dnnl_aBCde8c16b2c,
1058     dnnl_gOIhw8i8o = dnnl_aBCde8c8b,
1059     dnnl_gOIhw8o16i2o = dnnl_aBCde8b16c2b,
1060     dnnl_gIOhw8o16i2o = dnnl_aCBde8b16c2b,
1061     dnnl_gOIhw8o8i = dnnl_aBCde8b8c,
1062     dnnl_gOIhw8o4i = dnnl_aBCde8b4c,
1063     dnnl_Goihw32g = dnnl_Abcde32a,
1064     dnnl_gOwhi16o = dnnl_aBedc16b,
1065 
1066     dnnl_OIw4o8i8o4i = dnnl_ABc4a8b8a4b,
1067     dnnl_OIhw4o8i8o4i = dnnl_ABcd4a8b8a4b,
1068     dnnl_IOw4i8o8i4o = dnnl_BAc4b8a8b4a,
1069     dnnl_IOhw4i8o8i4o = dnnl_BAcd4b8a8b4a,
1070     dnnl_IOdhw4i8o8i4o = dnnl_BAcde4b8a8b4a,
1071 
1072     dnnl_OIhw2o8i8o2i = dnnl_ABcd2a8b8a2b,
1073     dnnl_gOIw4o8i8o4i = dnnl_aBCd4b8c8b4c,
1074     dnnl_gOIhw4o8i8o4i = dnnl_aBCde4b8c8b4c,
1075     dnnl_gOIdhw4o8i8o4i = dnnl_aBCdef4b8c8b4c,
1076     dnnl_gIOw4i8o8i4o = dnnl_aCBd4c8b8c4b,
1077     dnnl_gIOhw4i8o8i4o = dnnl_aCBde4c8b8c4b,
1078     dnnl_gIOdhw4i8o8i4o = dnnl_aCBdef4c8b8c4b,
1079     dnnl_gOIhw2o8i8o2i = dnnl_aBCde2b8c8b2c,
1080     dnnl_gOIhw2i4o2i = dnnl_aBCde2c4b2c,
1081     dnnl_gOIhw2o4i2o = dnnl_aBCde2b4c2b,
1082     dnnl_gOIhw4i8o2i = dnnl_aBCde4c8b2c,
1083     dnnl_gOIhw4o8i2o = dnnl_aBCde4b8c2b,
1084 
1085     // weights w/ groups, 6D
1086     dnnl_gIOdhw16i16o = dnnl_aCBdef16c16b,
1087     dnnl_gIOdhw16o16i = dnnl_aCBdef16b16c,
1088     dnnl_gOdhwi16o = dnnl_aBdefc16b,
1089     dnnl_gOdhwI16o2i = dnnl_aBdefC16b2c,
1090     dnnl_gOdhwI16o4i = dnnl_aBdefC16b4c,
1091     dnnl_gOdhwi4o = dnnl_aBdefc4b,
1092     dnnl_gOdhwi8o = dnnl_aBdefc8b,
1093     dnnl_gOIdhw16i16o = dnnl_aBCdef16c16b,
1094     dnnl_gOIdhw4i16o4i = dnnl_aBCdef4c16b4c,
1095     dnnl_gOIdhw16i16o4i = dnnl_aBCdef16c16b4c,
1096     dnnl_gOIdhw2i8o4i = dnnl_aBCdef2c8b4c,
1097     dnnl_gOIdhw16i16o2i = dnnl_aBCdef16c16b2c,
1098     dnnl_gOIdhw16o16i = dnnl_aBCdef16b16c,
1099     dnnl_gOidhw16o = dnnl_aBcdef16b,
1100     dnnl_gOIdhw4i4o = dnnl_aBCdef4c4b,
1101     dnnl_gOIdhw4o4i = dnnl_aBCdef4b4c,
1102     dnnl_gOidhw4o = dnnl_aBcdef4b,
1103     dnnl_gOIdhw8i16o2i = dnnl_aBCdef8c16b2c,
1104     dnnl_gOIdhw8i8o = dnnl_aBCdef8c8b,
1105     dnnl_gOIdhw8o16i2o = dnnl_aBCdef8b16c2b,
1106     dnnl_gIOdhw8o16i2o = dnnl_aCBdef8b16c2b,
1107     dnnl_gOIdhw8o8i = dnnl_aBCdef8b8c,
1108     dnnl_gOIdhw8o4i = dnnl_aBCdef8b4c,
1109     dnnl_Goidhw16g = dnnl_Abcdef16a,
1110     dnnl_Goidhw32g = dnnl_Abcdef32a,
1111     dnnl_gOIdhw2i4o2i = dnnl_aBCdef2c4b2c,
1112     dnnl_gOIdhw4i8o2i = dnnl_aBCdef4c8b2c,
1113     dnnl_gOIdhw2o4i2o = dnnl_aBCdef2b4c2b,
1114     dnnl_gOIdhw4o8i2o = dnnl_aBCdef4b8c2b,
1115     // weights, 3D
1116     dnnl_Owi32o = dnnl_Acb32a,
1117     dnnl_OwI32o2i = dnnl_AcB32a2b,
1118     dnnl_OwI32o4i = dnnl_AcB32a4b,
1119     dnnl_Owi48o = dnnl_Acb48a,
1120     dnnl_OwI48o2i = dnnl_AcB48a2b,
1121     dnnl_OwI48o4i = dnnl_AcB48a4b,
1122     dnnl_Owi64o = dnnl_Acb64a,
1123     dnnl_OwI64o2i = dnnl_AcB64a2b,
1124     dnnl_OwI64o4i = dnnl_AcB64a4b,
1125     dnnl_wIo2i = dnnl_cBa2b,
1126     dnnl_wIo4i = dnnl_cBa4b,
1127     dnnl_gOwi32o = dnnl_aBdc32b,
1128     dnnl_gOwI32o2i = dnnl_aBdC32b2c,
1129     dnnl_gOwI32o4i = dnnl_aBdC32b4c,
1130     dnnl_gOwi48o = dnnl_aBdc48b,
1131     dnnl_gOwI48o2i = dnnl_aBdC48b2c,
1132     dnnl_gOwI48o4i = dnnl_aBdC48b4c,
1133     dnnl_gOwi64o = dnnl_aBdc64b,
1134     dnnl_gOwI64o2i = dnnl_aBdC64b2c,
1135     dnnl_gOwI64o4i = dnnl_aBdC64b4c,
1136     dnnl_gwio = dnnl_adcb,
1137     dnnl_gwIo2i = dnnl_adCb2c,
1138     dnnl_gwIo4i = dnnl_adCb4c,
1139     // weights, 4D
1140     dnnl_OhwI32o = dnnl_Acdb32a,
1141     dnnl_OhwI32o2i = dnnl_AcdB32a2b,
1142     dnnl_OhwI32o4i = dnnl_AcdB32a4b,
1143     dnnl_Ohwi48o = dnnl_Acdb48a,
1144     dnnl_OhwI48o2i = dnnl_AcdB48a2b,
1145     dnnl_OhwI48o4i = dnnl_AcdB48a4b,
1146     dnnl_Ohwi64o = dnnl_Acdb64a,
1147     dnnl_OhwI64o2i = dnnl_AcdB64a2b,
1148     dnnl_OhwI64o4i = dnnl_AcdB64a4b,
1149     dnnl_hwIo2i = dnnl_cdBa2b,
1150     dnnl_hwIo4i = dnnl_cdBa4b,
1151     dnnl_gOhwI32o = dnnl_aBdec32b,
1152     dnnl_gOhwI32o2i = dnnl_aBdeC32b2c,
1153     dnnl_gOhwI32o4i = dnnl_aBdeC32b4c,
1154     dnnl_gOhwi48o = dnnl_aBdec48b,
1155     dnnl_gOhwI48o2i = dnnl_aBdeC48b2c,
1156     dnnl_gOhwI48o4i = dnnl_aBdeC48b4c,
1157     dnnl_gOhwi64o = dnnl_aBdec64b,
1158     dnnl_gOhwI64o2i = dnnl_aBdeC64b2c,
1159     dnnl_gOhwI64o4i = dnnl_aBdeC64b4c,
1160     dnnl_ghwio = dnnl_adecb,
1161     dnnl_ghwIo2i = dnnl_adeCb2c,
1162     dnnl_ghwIo4i = dnnl_adeCb4c,
1163     // weights, 5D
1164     dnnl_Odhwi32o = dnnl_Acdeb32a,
1165     dnnl_OdhwI32o2i = dnnl_AcdeB32a2b,
1166     dnnl_OdhwI32o4i = dnnl_AcdeB32a4b,
1167     dnnl_Odhwi48o = dnnl_Acdeb48a,
1168     dnnl_OdhwI48o2i = dnnl_AcdeB48a2b,
1169     dnnl_OdhwI48o4i = dnnl_AcdeB48a4b,
1170     dnnl_Odhwi64o = dnnl_Acdeb64a,
1171     dnnl_OdhwI64o2i = dnnl_AcdeB64a2b,
1172     dnnl_OdhwI64o4i = dnnl_AcdeB64a4b,
1173     dnnl_dhwIo2i = dnnl_cdeBa2b,
1174     dnnl_dhwIo4i = dnnl_cdeBa4b,
1175     dnnl_gOdhwi32o = dnnl_aBdefc32b,
1176     dnnl_gOdhwI32o2i = dnnl_aBdefC32b2c,
1177     dnnl_gOdhwI32o4i = dnnl_aBdefC32b4c,
1178     dnnl_gOdhwi48o = dnnl_aBdefc48b,
1179     dnnl_gOdhwI48o2i = dnnl_aBdefC48b2c,
1180     dnnl_gOdhwI48o4i = dnnl_aBdefC48b4c,
1181     dnnl_gOdhwi64o = dnnl_aBdefc64b,
1182     dnnl_gOdhwI64o2i = dnnl_aBdefC64b2c,
1183     dnnl_gOdhwI64o4i = dnnl_aBdefC64b4c,
1184     dnnl_gdhwio = dnnl_adefcb,
1185     dnnl_gdhwIo2i = dnnl_adefCb2c,
1186     dnnl_gdhwIo4i = dnnl_adefCb4c,
1187     dnnl_OI16i32o4i = dnnl_AB16b32a4b,
1188     dnnl_OI16i48o4i = dnnl_AB16b48a4b,
1189     dnnl_OI16i64o4i = dnnl_AB16b64a4b,
1190     dnnl_OI16i16o2i = dnnl_AB16b16a2b,
1191     dnnl_OI16i32o2i = dnnl_AB16b32a2b,
1192     dnnl_OI16i48o2i = dnnl_AB16b48a2b,
1193     dnnl_OI16i64o2i = dnnl_AB16b64a2b,
1194     dnnl_OIw16i32o4i = dnnl_ABc16b32a4b,
1195     dnnl_OIw16i48o4i = dnnl_ABc16b48a4b,
1196     dnnl_OIw16i64o4i = dnnl_ABc16b64a4b,
1197     dnnl_OIw16i32o2i = dnnl_ABc16b32a2b,
1198     dnnl_OIw16i48o2i = dnnl_ABc16b48a2b,
1199     dnnl_OIw16i64o2i = dnnl_ABc16b64a2b,
1200     dnnl_OIhw16i32o4i = dnnl_ABcd16b32a4b,
1201     dnnl_OIhw16i48o4i = dnnl_ABcd16b48a4b,
1202     dnnl_OIhw16i64o4i = dnnl_ABcd16b64a4b,
1203     dnnl_OIhw16i32o2i = dnnl_ABcd16b32a2b,
1204     dnnl_OIhw16i48o2i = dnnl_ABcd16b48a2b,
1205     dnnl_OIhw16i64o2i = dnnl_ABcd16b64a2b,
1206     dnnl_OIdhw16i32o4i = dnnl_ABcde16b32a4b,
1207     dnnl_OIdhw16i48o4i = dnnl_ABcde16b48a4b,
1208     dnnl_OIdhw16i64o4i = dnnl_ABcde16b64a4b,
1209     dnnl_OIdhw16i32o2i = dnnl_ABcde16b32a2b,
1210     dnnl_OIdhw16i48o2i = dnnl_ABcde16b48a2b,
1211     dnnl_OIdhw16i64o2i = dnnl_ABcde16b64a2b,
1212     dnnl_OwI16i16o2i = dnnl_AcB16b16a2b,
1213     dnnl_OwI16i16o4i = dnnl_AcB16b16a4b,
1214     dnnl_OhwI16i16o2i = dnnl_AcdB16b16a2b,
1215     dnnl_OhwI16i16o4i = dnnl_AcdB16b16a4b,
1216     dnnl_OdhwI16i16o2i = dnnl_AcdeB16b16a2b,
1217     dnnl_OdhwI16i16o4i = dnnl_AcdeB16b16a4b,
1218     dnnl_gOwI16i16o2i = dnnl_aBdC16c16b2c,
1219     dnnl_gOwI16i16o4i = dnnl_aBdC16c16b4c,
1220     dnnl_gOhwI16i16o2i = dnnl_aBdeC16c16b2c,
1221     dnnl_gOhwI16i16o4i = dnnl_aBdeC16c16b4c,
1222     dnnl_gOdhwI16i16o2i = dnnl_aBdefC16c16b2c,
1223     dnnl_gOdhwI16i16o4i = dnnl_aBdefC16c16b4c,
1224     dnnl_OwI16i32o2i = dnnl_AcB16b32a2b,
1225     dnnl_OwI16i32o4i = dnnl_AcB16b32a4b,
1226     dnnl_OwI16i48o2i = dnnl_AcB16b48a2b,
1227     dnnl_OwI16i48o4i = dnnl_AcB16b48a4b,
1228     dnnl_OwI16i64o2i = dnnl_AcB16b64a2b,
1229     dnnl_OwI16i64o4i = dnnl_AcB16b64a4b,
1230     dnnl_gOwI16i32o2i = dnnl_aBdC16c32b2c,
1231     dnnl_gOwI16i32o4i = dnnl_aBdC16c32b4c,
1232     dnnl_gOwI16i48o2i = dnnl_aBdC16c48b2c,
1233     dnnl_gOwI16i48o4i = dnnl_aBdC16c48b4c,
1234     dnnl_gOwI16i64o2i = dnnl_aBdC16c64b2c,
1235     dnnl_gOwI16i64o4i = dnnl_aBdC16c64b4c,
1236     dnnl_OhwI16i32o2i = dnnl_AcdB16b32a2b,
1237     dnnl_OhwI16i32o4i = dnnl_AcdB16b32a4b,
1238     dnnl_OhwI16i48o2i = dnnl_AcdB16b48a2b,
1239     dnnl_OhwI16i48o4i = dnnl_AcdB16b48a4b,
1240     dnnl_OhwI16i64o2i = dnnl_AcdB16b64a2b,
1241     dnnl_OhwI16i64o4i = dnnl_AcdB16b64a4b,
1242     dnnl_gOhwI16i32o2i = dnnl_aBdeC16c32b2c,
1243     dnnl_gOhwI16i32o4i = dnnl_aBdeC16c32b4c,
1244     dnnl_gOhwI16i48o2i = dnnl_aBdeC16c48b2c,
1245     dnnl_gOhwI16i48o4i = dnnl_aBdeC16c48b4c,
1246     dnnl_gOhwI16i64o2i = dnnl_aBdeC16c64b2c,
1247     dnnl_gOhwI16i64o4i = dnnl_aBdeC16c64b4c,
1248     dnnl_OdhwI16i32o2i = dnnl_AcdeB16b32a2b,
1249     dnnl_OdhwI16i32o4i = dnnl_AcdeB16b32a4b,
1250     dnnl_OdhwI16i48o2i = dnnl_AcdeB16b48a2b,
1251     dnnl_OdhwI16i48o4i = dnnl_AcdeB16b48a4b,
1252     dnnl_OdhwI16i64o2i = dnnl_AcdeB16b64a2b,
1253     dnnl_OdhwI16i64o4i = dnnl_AcdeB16b64a4b,
1254     dnnl_gOdhwI16i32o2i = dnnl_aBdefC16c32b2c,
1255     dnnl_gOdhwI16i32o4i = dnnl_aBdefC16c32b4c,
1256     dnnl_gOdhwI16i48o2i = dnnl_aBdefC16c48b2c,
1257     dnnl_gOdhwI16i48o4i = dnnl_aBdefC16c48b4c,
1258     dnnl_gOdhwI16i64o2i = dnnl_aBdefC16c64b2c,
1259     dnnl_gOdhwI16i64o4i = dnnl_aBdefC16c64b4c,
1260     dnnl_hwioG16g = dnnl_decbA16a,
1261     dnnl_NCdhw40n16c = dnnl_ABcde40a16b,
1262     dnnl_NCw40n16c = dnnl_ABc40a16b,
1263     dnnl_NChw40n16c = dnnl_ABcd40a16b,
1264     dnnl_NCw40n32c = dnnl_ABc40a32b,
1265     dnnl_NChw40n32c = dnnl_ABcd40a32b,
1266     dnnl_NCdhw40n32c = dnnl_ABcde40a32b,
1267     dnnl_OIdhw4o8i8o2i = dnnl_ABcde4a8b8a2b,
1268     dnnl_OIhw4o8i8o2i = dnnl_ABcd4a8b8a2b,
1269     dnnl_OIw4o8i8o2i = dnnl_ABc4a8b8a2b,
1270     dnnl_gOIdhw4o8i8o2i = dnnl_aBCdef4b8c8b2c,
1271     dnnl_gOIhw4o8i8o2i = dnnl_aBCde4b8c8b2c,
1272     dnnl_gOIw4o8i8o2i = dnnl_aBCd4b8c8b2c,
1273     dnnl_IOdhw4i8o8i2o = dnnl_BAcde4b8a8b2a,
1274     dnnl_IOhw4i8o8i2o = dnnl_BAcd4b8a8b2a,
1275     dnnl_IOw4i8o8i2o = dnnl_BAc4b8a8b2a,
1276     dnnl_gIOdhw4i8o8i2o = dnnl_aCBdef4c8b8c2b,
1277     dnnl_gIOhw4i8o8i2o = dnnl_aCBde4c8b8c2b,
1278     dnnl_gIOw4i8o8i2o = dnnl_aCBd4c8b8c2b,
1279     dnnl_NCw2c32n8c = dnnl_ABc2b32a8b,
1280     dnnl_NChw2c32n8c = dnnl_ABcd2b32a8b,
1281     dnnl_NCdhw2c32n8c = dnnl_ABcde2b32a8b,
1282     dnnl_OIw2i8o16i4o = dnnl_ABc2b8a16b4a,
1283     dnnl_OIhw2i8o16i4o = dnnl_ABcd2b8a16b4a,
1284     dnnl_OIdhw2i8o16i4o = dnnl_ABcde2b8a16b4a,
1285     dnnl_OIw2o8i16o4i = dnnl_ABc2a8b16a4b,
1286     dnnl_OIw2o8i16o2i = dnnl_ABc2a8b16a2b,
1287     dnnl_IOw2i8o16i4o = dnnl_BAc2b8a16b4a,
1288     dnnl_IOw2i8o16i2o = dnnl_BAc2b8a16b2a,
1289     dnnl_OIhw2o8i16o4i = dnnl_ABcd2a8b16a4b,
1290     dnnl_OIhw2o8i16o2i = dnnl_ABcd2a8b16a2b,
1291     dnnl_IOhw2i8o16i4o = dnnl_BAcd2b8a16b4a,
1292     dnnl_IOhw2i8o16i2o = dnnl_BAcd2b8a16b2a,
1293     dnnl_OIdhw2o8i16o4i = dnnl_ABcde2a8b16a4b,
1294     dnnl_OIdhw2o8i16o2i = dnnl_ABcde2a8b16a2b,
1295     dnnl_IOdhw2i8o16i4o = dnnl_BAcde2b8a16b4a,
1296     dnnl_IOdhw2i8o16i2o = dnnl_BAcde2b8a16b2a,
1297     dnnl_gOIw2o8i16o2i = dnnl_aBCd2b8c16b2c,
1298     dnnl_gIOw2i8o16i2o = dnnl_aCBd2c8b16c2b,
1299     dnnl_gIOhw2i8o16i2o = dnnl_aBCde2c8b16c2b,
1300     dnnl_gIOdhw2i8o16i2o = dnnl_aBCdef2c8b16c2b,
1301     dnnl_gOIhw2o8i16o2i = dnnl_aBCde2b8c16b2c,
1302     dnnl_gOIdhw2o8i16o2i = dnnl_aBCdef2b8c16b2c,
1303     dnnl_gOIw2o8i16o4i = dnnl_aBCd2b8c16b4c,
1304     dnnl_gOIhw2o8i16o4i = dnnl_aBCde2b8c16b4c,
1305 } dnnl_format_tag_t;
1306 
1307 /// @} dnnl_api_memory
1308 
1309 /// @addtogroup dnnl_api_primitives
1310 /// @{
1311 /// @addtogroup dnnl_api_primitives_common
1312 /// @{
1313 
1314 /// Kinds of propagation.
1315 typedef enum {
1316     // TODO: suggest renames
1317     /// Undefined propagation type.
1318     dnnl_prop_kind_undef = 0,
1319     /// Forward data propagation (training mode). In this mode primitives
1320     /// perform computations necessary for subsequent backward propagation.
1321     dnnl_forward_training = 64,
1322     /// Forward data propagation (inference mode). In this mode primitives
1323     /// perform only computations that are necessary for inference and omit
1324     /// computations that are necessary only for backward propagation.
1325     dnnl_forward_inference = 96,
1326     /// Forward data propagation (alias for @c dnnl_forward_inference).
1327     dnnl_forward_scoring = dnnl_forward_inference,
1328     /// Forward data propagation (alias for @c dnnl_forward_training).
1329     dnnl_forward = dnnl_forward_training,
1330     /// Backward propagation (with respect to all parameters).
1331     dnnl_backward = 128,
1332     /// Backward data propagation.
1333     dnnl_backward_data = 160,
1334     /// Backward weights propagation.
1335     dnnl_backward_weights = 192,
1336     /// Backward bias propagation.
1337     dnnl_backward_bias = 193,
1338 } dnnl_prop_kind_t;
1339 
1340 /// Kinds of primitives. Used to implement a way to extend the library with new
1341 /// primitives without changing the ABI.
1342 typedef enum {
1343     /// Undefined primitive
1344     dnnl_undefined_primitive,
1345     /// A reorder primitive.
1346     dnnl_reorder,
1347     /// A shuffle primitive.
1348     dnnl_shuffle,
1349     /// A (out-of-place) concat primitive.
1350     dnnl_concat,
1351     /// A sum primitive.
1352     dnnl_sum,
1353     /// A convolution primitive.
1354     dnnl_convolution,
1355     /// A deconvolution primitive.
1356     dnnl_deconvolution,
1357     /// An element-wise primitive.
1358     dnnl_eltwise,
1359     /// A softmax primitive.
1360     dnnl_softmax,
1361     /// A pooling primitive.
1362     dnnl_pooling,
1363     /// An LRN primitive.
1364     dnnl_lrn,
1365     /// A batch normalization primitive.
1366     dnnl_batch_normalization,
1367     /// A layer normalization primitive.
1368     dnnl_layer_normalization,
1369     /// An inner product primitive.
1370     dnnl_inner_product,
1371     /// A rnn primitive.
1372     dnnl_rnn,
1373     /// A matrix multiplication primitive (internal).
1374     dnnl_gemm,
1375     /// A binary primitive.
1376     dnnl_binary,
1377     /// A logsoftmax primitive.
1378     dnnl_logsoftmax,
1379     /// A matrix multiplication primitive.
1380     dnnl_matmul,
1381     /// A resampling primitive.
1382     dnnl_resampling,
1383     /// A pooling version 2 primitive (pooling with dilation support).
1384     dnnl_pooling_v2,
1385     /// A reduction primitive.
1386     dnnl_reduction,
1387     /// A PReLU primitive.
1388     dnnl_prelu,
1389 
1390     /// Parameter to allow internal only primitives without undefined behavior.
1391     /// This parameter is chosen to be valid for so long as sizeof(int) >= 2.
1392     dnnl_primitive_kind_max = 0x7fff,
1393 } dnnl_primitive_kind_t;
1394 
1395 /// Kinds of algorithms.
1396 typedef enum {
1397     dnnl_alg_kind_undef,
1398     /// Direct convolution
1399     dnnl_convolution_direct = 0x1,
1400     /// Winograd convolution
1401     dnnl_convolution_winograd = 0x2,
1402     /// Convolution algorithm(either direct or Winograd) is chosen just in time
1403     dnnl_convolution_auto = 0x3,
1404     /// Direct deconvolution
1405     dnnl_deconvolution_direct = 0xa,
1406     /// Winograd deconvolution
1407     dnnl_deconvolution_winograd = 0xb,
1408     /// Eltwise: ReLU
1409     dnnl_eltwise_relu = 0x1f,
1410     /// Eltwise: hyperbolic tangent non-linearity (tanh)
1411     dnnl_eltwise_tanh = 0x2f,
1412     /// Eltwise: exponential linear unit (elu)
1413     dnnl_eltwise_elu = 0x3f,
1414     /// Eltwise: square
1415     dnnl_eltwise_square = 0x4f,
1416     /// Eltwise: abs
1417     dnnl_eltwise_abs = 0x5f,
1418     /// Eltwise: square root
1419     dnnl_eltwise_sqrt = 0x6f,
1420     /// Eltwise: linear
1421     dnnl_eltwise_linear = 0x7f,
1422     /// Eltwise: bounded_relu
1423     dnnl_eltwise_bounded_relu = 0x8f,
1424     /// Eltwise: soft_relu
1425     dnnl_eltwise_soft_relu = 0x9f,
1426     /// Eltwise: logistic
1427     dnnl_eltwise_logistic = 0xaf,
1428     /// Eltwise: exponent
1429     dnnl_eltwise_exp = 0xbf,
1430     /// Eltwise: gelu
1431     ///
1432     /// @note Tanh approximation formula is used to approximate
1433     /// the cumulative distribution function of a Gaussian here
1434     dnnl_eltwise_gelu_tanh = 0xcf,
1435     /// Eltwise: tanh-based gelu (alias for dnnl_eltwise_gelu_tanh)
1436     dnnl_eltwise_gelu = dnnl_eltwise_gelu_tanh,
1437     /// Eltwise: swish
1438     dnnl_eltwise_swish = 0xdf,
1439     /// Eltwise: natural logarithm
1440     dnnl_eltwise_log = 0xef,
1441     /// Eltwise: clip
1442     dnnl_eltwise_clip = 0xff,
1443     /// Eltwise: clip version 2
1444     dnnl_eltwise_clip_v2 = 0x10,
1445     /// Eltwise: pow
1446     dnnl_eltwise_pow = 0x20,
1447     /// Eltwise: erf-based gelu
1448     dnnl_eltwise_gelu_erf = 0x30,
1449     /// Eltwise: round
1450     dnnl_eltwise_round = 0x40,
1451     /// Eltwise: logsigmoid
1452     dnnl_eltwise_logsigmoid = 0x50,
1453     /// Eltwise: mish
1454     dnnl_eltwise_mish = 0x60,
1455     /// Eltwise: hardswish
1456     dnnl_eltwise_hardswish = 0x70,
1457     /// Eltwise: ReLU (dst for backward)
1458     dnnl_eltwise_relu_use_dst_for_bwd = 0x100,
1459     /// Eltwise: hyperbolic tangent non-linearity (tanh) (dst for backward)
1460     dnnl_eltwise_tanh_use_dst_for_bwd = 0x101,
1461     /// Eltwise: exponential linear unit (elu) (dst for backward)
1462     dnnl_eltwise_elu_use_dst_for_bwd = 0x102,
1463     /// Eltwise: square root (dst for backward)
1464     dnnl_eltwise_sqrt_use_dst_for_bwd = 0x103,
1465     /// Eltwise: logistic (dst for backward)
1466     dnnl_eltwise_logistic_use_dst_for_bwd = 0x104,
1467     /// Eltwise: exp (dst for backward)
1468     dnnl_eltwise_exp_use_dst_for_bwd = 0x105,
1469     /// Eltwise: clip version 2 (dst for backward)
1470     dnnl_eltwise_clip_v2_use_dst_for_bwd = 0x106,
1471     /// Max pooling
1472     dnnl_pooling_max = 0x1ff,
1473     /// Average pooling include padding
1474     dnnl_pooling_avg_include_padding = 0x2ff,
1475     /// Average pooling exclude padding
1476     dnnl_pooling_avg_exclude_padding = 0x3ff,
1477     /// Average pooling (alias for #dnnl_pooling_avg_exclude_padding)
1478     dnnl_pooling_avg = dnnl_pooling_avg_exclude_padding,
1479     /// Local response normalization (LRN) across multiple channels
1480     dnnl_lrn_across_channels = 0xaff,
1481     /// LRN within a single channel
1482     dnnl_lrn_within_channel = 0xbff,
1483     /// RNN cell
1484     dnnl_vanilla_rnn = 0x1fff,
1485     /// LSTM cell
1486     dnnl_vanilla_lstm = 0x2fff,
1487     /// GRU cell
1488     dnnl_vanilla_gru = 0x3fff,
1489     /// GRU cell with linear before reset
1490     ///
1491     /// Modification of original GRU cell. Differs from #dnnl_vanilla_gru
1492     /// in how the new memory gate is calculated:
1493     /// \f[ c_t = tanh(W_c*x_t + b_{c_x} + r_t*(U_c*h_{t-1}+b_{c_h})) \f]
1494     /// Primitive expects 4 biases on input:
1495     /// \f$[b_{u}, b_{r}, b_{c_x}, b_{c_h}]\f$
1496     dnnl_lbr_gru = 0x4fff,
1497     /// Binary add
1498     dnnl_binary_add = 0x1fff0,
1499     /// Binary mul
1500     dnnl_binary_mul = 0x1fff1,
1501     /// Binary max
1502     dnnl_binary_max = 0x1fff2,
1503     /// Binary min
1504     dnnl_binary_min = 0x1fff3,
1505     /// Binary div
1506     dnnl_binary_div = 0x1fff4,
1507     /// Binary sub
1508     dnnl_binary_sub = 0x1fff5,
1509     /// Binary greater or equal
1510     dnnl_binary_ge = 0x1fff6,
1511     /// Binary greater than
1512     dnnl_binary_gt = 0x1fff7,
1513     /// Binary less or equal
1514     dnnl_binary_le = 0x1fff8,
1515     /// Binary less than
1516     dnnl_binary_lt = 0x1fff9,
1517     /// Binary equal
1518     dnnl_binary_eq = 0x1fffa,
1519     /// Binary not equal
1520     dnnl_binary_ne = 0x1fffb,
1521     /// Nearest Neighbor Resampling Method
1522     dnnl_resampling_nearest = 0x2fff0,
1523     /// Linear Resampling Method
1524     dnnl_resampling_linear = 0x2fff1,
1525     /// Reduction using max
1526     dnnl_reduction_max,
1527     /// Reduction using min
1528     dnnl_reduction_min,
1529     /// Reduction using sum
1530     dnnl_reduction_sum,
1531     /// Reduction using mul
1532     dnnl_reduction_mul,
1533     /// Reduction using mean
1534     dnnl_reduction_mean,
1535     /// Reduction using lp norm
1536     dnnl_reduction_norm_lp_max,
1537     /// Reduction using lp norm
1538     dnnl_reduction_norm_lp_sum,
1539     /// Reduction using lp norm without final pth-root
1540     dnnl_reduction_norm_lp_power_p_max,
1541     /// Reduction using lp norm without final pth-root
1542     dnnl_reduction_norm_lp_power_p_sum,
1543 } dnnl_alg_kind_t;
1544 
1545 /// Flags for normalization primitives.
1546 typedef enum {
1547     /// Use no normalization flags
1548     ///
1549     /// If specified
1550     ///  - on forward training propagation mean and variance are computed and
1551     ///    stored as output
1552     ///  - on backward propagation compute full derivative wrt data
1553     ///  - on backward propagation prop_kind == #dnnl_backward_data has the same
1554     ///    behavior as prop_kind == #dnnl_backward
1555     dnnl_normalization_flags_none = 0x0U,
1556 
1557     /// Use global statistics
1558     ///
1559     /// If specified
1560     ///  - on forward propagation use mean and variance provided by user (input)
1561     ///  - on backward propagation reduces the amount of computations, since
1562     ///    mean and variance are considered as constants
1563     ///
1564     ///  If not specified:
1565     ///   - on forward propagation mean and variance are computed and stored as
1566     ///     output
1567     ///   - on backward propagation compute full derivative wrt data
1568     dnnl_use_global_stats = 0x1U,
1569 
1570     /// Use scale and shift parameters
1571     ///
1572     /// If specified:
1573     ///  - on forward propagation use scale and shift (aka scale and bias) for
1574     ///    the normalization results
1575     ///  - on backward propagation (for prop_kind == #dnnl_backward) compute
1576     ///    diff wrt scale and shift (hence one extra output used)
1577     ///
1578     /// If no specified:
1579     ///  - on backward propagation prop_kind == #dnnl_backward_data has the
1580     ///    same behavior as prop_kind == #dnnl_backward
1581     dnnl_use_scaleshift = 0x2U,
1582 
1583     /// Fuse with ReLU
1584     ///
1585     /// The flag implies negative slope being 0. On training this is the only
1586     /// configuration supported. For inference, to use non-zero negative slope
1587     /// consider using @ref dev_guide_attributes_post_ops.
1588     ///
1589     /// If specified:
1590     ///  - on inference this option behaves the same as if the primitive were
1591     ///    fused with ReLU using post ops API with zero negative slope.
1592     ///  - on training primitive requires workspace (required to be able to
1593     ///    perform backward pass)
1594     dnnl_fuse_norm_relu = 0x4U,
1595 
1596     /// Use scale parameter
1597     ///
1598     /// If specified:
1599     ///  - on forward propagation use scale for the normalization results
1600     ///  - on backward propagation (for prop_kind == #dnnl_backward) compute
1601     ///    diff wrt scale (hence one extra output used)
1602     dnnl_use_scale = 0x8U,
1603 
1604     /// Use shift parameter
1605     ///
1606     /// If specified:
1607     ///  - on forward propagation use shift (aka bias) for the normalization
1608     ///    results
1609     ///  - on backward propagation (for prop_kind == #dnnl_backward) compute
1610     ///    diff wrt shift (hence one extra output used)
1611     dnnl_use_shift = 0x10U,
1612 } dnnl_normalization_flags_t;
1613 
1614 /// @} dnnl_api_primitives_common
1615 /// @} dnnl_api_primitives
1616 
1617 /// @addtogroup dnnl_api_memory
1618 /// @{
1619 
1620 /// Maximum number of dimensions a tensor can have. Only restricts the amount
1621 /// of space used for the tensor description. Individual computational
1622 /// primitives may support only tensors of certain dimensions.
1623 #define DNNL_MAX_NDIMS 12
1624 
1625 /// A wildcard value for dimensions that are unknown at a primitive creation
1626 /// time.
1627 #define DNNL_RUNTIME_DIM_VAL INT64_MIN
1628 
1629 /// A `size_t` counterpart of the DNNL_RUNTIME_DIM_VAL.
1630 /// For instance, this value is returned by dnnl_memory_desc_get_size() if
1631 /// either of the dimensions or strides equal to #DNNL_RUNTIME_DIM_VAL.
1632 #define DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL)
1633 
1634 /// @cond DO_NOT_DOCUMENT_THIS
1635 /// Hex representation for a **special** quiet NAN (!= NAN from math.h)
1636 static const union {
1637     unsigned u;
1638     float f;
1639 } DNNL_RUNTIME_F32_VAL_REP = {0x7fc000d0};
1640 /// @endcond
1641 
1642 /// A wildcard value for floating point values that are unknown at a primitive
1643 /// creation time.
1644 #define DNNL_RUNTIME_F32_VAL (DNNL_RUNTIME_F32_VAL_REP.f)
1645 
1646 /// @cond DO_NOT_DOCUMENT_THIS
1647 static const int DNNL_RUNTIME_S32_VAL_REP = INT32_MIN;
1648 /// @endcond
1649 
1650 /// A wildcard value for int32_t values that are unknown at a primitive creation
1651 /// time.
1652 #define DNNL_RUNTIME_S32_VAL DNNL_RUNTIME_S32_VAL_REP
1653 
1654 /// A type to describe tensor dimension.
1655 typedef int64_t dnnl_dim_t;
1656 
1657 /// A type to describe tensor dimensions.
1658 typedef dnnl_dim_t dnnl_dims_t[DNNL_MAX_NDIMS];
1659 
1660 /// Generic description of blocked data layout for most memory formats.
1661 ///
1662 /// @sa @ref dev_guide_understanding_memory_formats
1663 typedef struct {
1664     /// The strides between the outermost blocks.
1665     /// In case of plain (non-blocked) formats the strides between dimensions.
1666     dnnl_dims_t strides;
1667     // Innermost section
1668     // ASSUMPTION: the innermost blocks are always dense
1669     /// The number of innermost blocks, e.g. 3 in case of `OIhw_4i16o4i_`
1670     int inner_nblks;
1671     /// The size of the blocks, e.g. `{4, 16, 4}` in case of `OIhw_4i16o4i`
1672     dnnl_dims_t inner_blks;
1673     /// The logical indices of the blocks, e.g. `{1, 0, 1}` in case of
1674     /// `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim
1675     dnnl_dims_t inner_idxs;
1676 } dnnl_blocking_desc_t;
1677 
1678 /// Winograd-specific formats
1679 typedef enum {
1680     /// Undefined memory format, used for empty memory descriptors.
1681     dnnl_wino_undef = 0,
1682     // Tensors of weights for 2x3 winograd convolutions.
1683     dnnl_wino_wei_aaOIoi, ///< Internal weights format for 2x3 Winograd
1684     dnnl_wino_wei_aaOio, ///< Internal weights format for 2x3 Winograd
1685     dnnl_wino_wei_aaOBiOo, ///< Internal weights format for 2x3 Winograd
1686     // Tensor of weights for 4x3 convolution.
1687     dnnl_wino_wei_OBaaIBOIio ///< Internal weights format for 4x3 Winograd
1688 } dnnl_wino_memory_format_t;
1689 
1690 /// Description of tensor of weights for winograd 2x3 convolution.
1691 typedef struct {
1692     dnnl_wino_memory_format_t wino_format;
1693     int r;
1694     int alpha;
1695     int ic;
1696     int oc;
1697     int ic_block;
1698     int oc_block;
1699     int ic2_block;
1700     int oc2_block;
1701     float adj_scale;
1702     size_t size;
1703 } dnnl_wino_desc_t;
1704 
1705 typedef enum {
1706     dnnl_packed_format_undef = 0,
1707     dnnl_ldigo_p,
1708     dnnl_ldgoi_p,
1709     dnnl_ldio_p
1710 } dnnl_rnn_packed_memory_format_t;
1711 
1712 /// Maximum number of parts of RNN weights tensor that require separate
1713 /// computation.
1714 #define DNNL_RNN_MAX_N_PARTS 4
1715 
1716 /// Description of tensor of packed weights for rnn.
1717 typedef struct {
1718     dnnl_rnn_packed_memory_format_t format;
1719     int n_parts;
1720     int n;
1721     int ldb;
1722     int parts[DNNL_RNN_MAX_N_PARTS];
1723     size_t part_pack_size[DNNL_RNN_MAX_N_PARTS];
1724     unsigned pack_part[DNNL_RNN_MAX_N_PARTS];
1725     size_t offset_compensation;
1726     size_t size;
1727     char reserved[200];
1728 } dnnl_rnn_packed_desc_t;
1729 
1730 /// Flags for memory special features
1731 typedef enum {
1732     dnnl_memory_extra_flag_none = 0x0U,
1733     /// Indicates the weights have an additional buffer, that depends on the
1734     /// @p compensation_mask.
1735     ///
1736     /// For instance, in 4D case with the compensation mask equals (1 << 0)
1737     /// the additional buffer would consist of OC values:
1738     /// O[oc : 0,OC] =
1739     ///  -128 * SUM(ic : 0,IC; kh : 0,KH; kw : 0,KW){ weights(oc, ic, kh, kw) }
1740     dnnl_memory_extra_flag_compensation_conv_s8s8 = 0x1U,
1741     dnnl_memory_extra_flag_scale_adjust = 0x2U,
1742     dnnl_memory_extra_flag_rnn_u8s8_compensation = 0x4U,
1743     dnnl_memory_extra_flag_gpu_rnn_u8s8_compensation
1744     = dnnl_memory_extra_flag_rnn_u8s8_compensation,
1745     dnnl_memory_extra_flag_compensation_conv_asymmetric_src = 0x8U,
1746     dnnl_memory_extra_flag_rnn_s8s8_compensation = 0x16U,
1747 } dnnl_memory_extra_flags_t;
1748 
1749 /// Description of extra information stored in memory
1750 typedef struct {
1751     /// The flags contain arbitrary extra information, such as compensation.
1752     /// @sa dnnl_memory_extra_flags_t
1753     uint64_t flags;
1754     /// Compensation mask
1755     int compensation_mask;
1756     /// Scale applied to the data
1757     float scale_adjust;
1758     /// Compensation mask for asymmetric quantization
1759     int asymm_compensation_mask;
1760     /// For future backwards compatibility
1761     char reserved[60];
1762 } dnnl_memory_extra_desc_t;
1763 
1764 /// Memory descriptor. The description is based on a number of dimensions,
1765 /// dimensions themselves, plus information about elements type and memory
1766 /// format. Additionally, contains format-specific descriptions of the data
1767 /// layout.
1768 typedef struct {
1769     /// Number of dimensions
1770     int ndims;
1771     /// Dimensions in the following order:
1772     /// - CNN data tensors: mini-batch, channel, spatial
1773     ///   (<code>{N, C, [[D,] H,] W}</code>)
1774     /// - CNN weight tensors: group (optional), output channel, input channel,
1775     ///   spatial (<code>{[G,] O, I, [[D,] H,] W}</code>)
1776     /// - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>)
1777     ///   or layers, directions, states, mini-batch, channels (<code>{L, D, S, N, C}</code>)
1778     /// - RNN weight tensor: layers, directions, input channel, gates, output channels
1779     ///   (<code>{L, D, I, G, O}</code>).
1780     ///
1781     /// @note
1782     ///    The order of dimensions does not depend on the memory format, so
1783     ///    whether the data is laid out in #dnnl_nchw or #dnnl_nhwc
1784     ///    the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>.
1785     dnnl_dims_t dims;
1786 
1787     /// Data type of the tensor elements.
1788     dnnl_data_type_t data_type;
1789 
1790     /// Size of the data including padding in each dimension.
1791     dnnl_dims_t padded_dims;
1792 
1793     /// Per-dimension offset from the padding to actual data, the top-level
1794     /// tensor with offsets applied must lie within the padding area.
1795     dnnl_dims_t padded_offsets;
1796 
1797     /// Offset from memory origin to the current block, non-zero only in
1798     /// a description of a memory sub-block.
1799     dnnl_dim_t offset0;
1800 
1801     /// Memory format kind.
1802     dnnl_format_kind_t format_kind;
1803     union {
1804         /// Description of the data layout for memory formats that use
1805         /// blocking.
1806         dnnl_blocking_desc_t blocking;
1807         /// Tensor of weights for integer 8bit winograd convolution.
1808         dnnl_wino_desc_t wino_desc;
1809         /// Tensor of packed weights for RNN.
1810         dnnl_rnn_packed_desc_t rnn_packed_desc;
1811         // ... other descriptions possible
1812     } format_desc;
1813 
1814     dnnl_memory_extra_desc_t extra;
1815 } dnnl_memory_desc_t;
1816 
1817 /// @struct dnnl_memory
1818 /// An opaque structure to describe a memory.
1819 struct dnnl_memory;
1820 
1821 /// A memory handle.
1822 typedef struct dnnl_memory *dnnl_memory_t;
1823 
1824 /// A constant memory handle.
1825 typedef const struct dnnl_memory *const_dnnl_memory_t;
1826 
1827 /// Special pointer value that indicates that a memory object should not have
1828 /// an underlying buffer.
1829 #define DNNL_MEMORY_NONE (NULL)
1830 
1831 /// Special pointer value that indicates that the library needs to allocate an
1832 /// underlying buffer for a memory object.
1833 #define DNNL_MEMORY_ALLOCATE ((void *)(size_t)-1)
1834 
1835 /// @} dnnl_api_memory
1836 
1837 /// @addtogroup dnnl_api_primitives
1838 /// @{
1839 /// @addtogroup dnnl_api_primitives_common
1840 /// @{
1841 
1842 /// A pointer to any of the operation descriptors.
1843 typedef void *dnnl_op_desc_t;
1844 /// A pointer to any of the operation descriptors (constant variant).
1845 typedef const void *const_dnnl_op_desc_t;
1846 
1847 /// @} dnnl_api_primitives_common
1848 /// @} dnnl_api_primitives
1849 
1850 /// @addtogroup dnnl_api_primitives
1851 /// @{
1852 
1853 /// @addtogroup dnnl_api_convolution
1854 /// @{
1855 
1856 /// A descriptor of a convolution operation.
1857 typedef struct {
1858     /// The kind of primitive. Used for self-identifying the primitive
1859     /// descriptor. Must be #dnnl_convolution.
1860     dnnl_primitive_kind_t primitive_kind;
1861     /// The kind of propagation. Possible values: #dnnl_forward_training,
1862     /// #dnnl_forward_inference, #dnnl_backward_data,
1863     /// #dnnl_backward_weights, and #dnnl_backward_bias.
1864     dnnl_prop_kind_t prop_kind;
1865     /// The kind of the convolution algorithm. Possible values:
1866     /// #dnnl_convolution_direct.
1867     dnnl_alg_kind_t alg_kind;
1868     /// Source memory descriptor.
1869     dnnl_memory_desc_t src_desc;
1870     /// Source gradient memory descriptor.
1871     dnnl_memory_desc_t diff_src_desc;
1872     /// Weights memory descriptor.
1873     dnnl_memory_desc_t weights_desc;
1874     /// Weights gradient memory descriptor.
1875     dnnl_memory_desc_t diff_weights_desc;
1876     /// Bias memory descriptor.
1877     dnnl_memory_desc_t bias_desc;
1878     /// Bias gradient memory descriptor.
1879     dnnl_memory_desc_t diff_bias_desc;
1880     /// Destination memory descriptor.
1881     dnnl_memory_desc_t dst_desc;
1882     /// Destination gradient memory descriptor.
1883     dnnl_memory_desc_t diff_dst_desc;
1884     /// Convolution strides in each spatial dimension.
1885     dnnl_dims_t strides;
1886     /// Convolution dilates in each spatial dimension.
1887     dnnl_dims_t dilates;
1888     /// Padding in each spatial dimension. padding[0] is a padding in the
1889     /// beginning (@p padding_l), padding[1] is a padding in the end (@p
1890     /// padding_r).
1891     dnnl_dims_t padding[2];
1892     /// The accumulator data type. Initialized automatically.
1893     dnnl_data_type_t accum_data_type;
1894 } dnnl_convolution_desc_t;
1895 
1896 /// @} dnnl_api_convolution
1897 
1898 /// @addtogroup dnnl_api_deconvolution
1899 /// @{
1900 
1901 /// A descriptor of a deconvolution operation.
1902 typedef dnnl_convolution_desc_t dnnl_deconvolution_desc_t;
1903 
1904 /// @} dnnl_api_deconvolution
1905 
1906 /// @addtogroup dnnl_api_shuffle
1907 /// @{
1908 
1909 /// A descriptor of a shuffle operation.
1910 typedef struct {
1911     /// The kind of primitive. Used for self-identifying the primitive
1912     /// descriptor. Must be #dnnl_shuffle.
1913     dnnl_primitive_kind_t primitive_kind;
1914     /// The kind of propagation. Possible values: #dnnl_forward_training,
1915     /// #dnnl_forward_inference, and #dnnl_backward_data.
1916     dnnl_prop_kind_t prop_kind;
1917     /// Source and destination memory descriptor,
1918     /// and source and destination gradient memory descriptor.
1919     dnnl_memory_desc_t data_desc;
1920     /// Axis for shuffling.
1921     int axis;
1922     /// Number of groups.
1923     dnnl_dim_t group_size;
1924 } dnnl_shuffle_desc_t;
1925 
1926 /// @} dnnl_api_shuffle
1927 
1928 /// @addtogroup dnnl_api_eltwise
1929 /// @{
1930 
1931 /// A descriptor of a element-wise operation.
1932 typedef struct {
1933     /// The kind of primitive. Used for self-identifying the primitive
1934     /// descriptor. Must be #dnnl_eltwise.
1935     dnnl_primitive_kind_t primitive_kind;
1936     /// The kind of propagation. Possible values: #dnnl_forward_training,
1937     /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
1938     dnnl_prop_kind_t prop_kind;
1939     /// The kind of eltwise algorithm. Possible values: #dnnl_eltwise_relu,
1940     /// #dnnl_eltwise_tanh, #dnnl_eltwise_elu, #dnnl_eltwise_square,
1941     /// #dnnl_eltwise_abs, #dnnl_eltwise_sqrt, #dnnl_eltwise_linear,
1942     /// #dnnl_eltwise_bounded_relu, #dnnl_eltwise_soft_relu,
1943     /// #dnnl_eltwise_logistic, #dnnl_eltwise_exp, #dnnl_eltwise_gelu_tanh,
1944     /// #dnnl_eltwise_swish, #dnnl_eltwise_log, #dnnl_eltwise_clip,
1945     /// #dnnl_eltwise_clip_v2, #dnnl_eltwise_pow, #dnnl_eltwise_gelu_erf,
1946     /// #dnnl_eltwise_round, #dnnl_eltwise_logsigmoid, #dnnl_eltwise_mish,
1947     /// #dnnl_eltwise_hardswish.
1948     /// Possible values for passing destination memory on backward:
1949     /// #dnnl_eltwise_relu_use_dst_for_bwd, #dnnl_eltwise_tanh_use_dst_for_bwd,
1950     /// #dnnl_eltwise_elu_use_dst_for_bwd, #dnnl_eltwise_sqrt_use_dst_for_bwd,
1951     /// #dnnl_eltwise_logistic_use_dst_for_bwd,
1952     /// #dnnl_eltwise_exp_use_dst_for_bwd,
1953     /// #dnnl_eltwise_clip_v2_use_dst_for_bwd.
1954     dnnl_alg_kind_t alg_kind;
1955     /// Source and destination memory descriptor.
1956     dnnl_memory_desc_t data_desc;
1957     /// Source and destination gradient memory descriptor.
1958     dnnl_memory_desc_t diff_data_desc;
1959     /// Algorithm specific parameter.
1960     /// Accordance table:
1961     ///  - #dnnl_eltwise_relu: @p alpha -- negative slope, @p beta ignored
1962     ///  - #dnnl_eltwise_tanh: @p alpha and @p beta ignored
1963     ///  - #dnnl_eltwise_elu: @p alpha -- negative slope, @p beta ignored
1964     ///  - #dnnl_eltwise_square: @p alpha and @p beta ignored
1965     ///  - #dnnl_eltwise_abs: @p alpha and @p beta ignored
1966     ///  - #dnnl_eltwise_sqrt: @p alpha and @p beta ignored
1967     ///  - #dnnl_eltwise_linear: @p alpha -- scale, @p beta -- shift
1968     ///  - #dnnl_eltwise_bounded_relu: @p alpha -- upper bound, @p beta ignored
1969     ///  - #dnnl_eltwise_soft_relu: @p alpha and @p beta ignored
1970     ///  - #dnnl_eltwise_logistic: @p alpha and @p beta ignored
1971     ///  - #dnnl_eltwise_exp: @p alpha and @p beta ignored
1972     ///  - #dnnl_eltwise_gelu_tanh: @p alpha and @p beta ignored
1973     ///  - #dnnl_eltwise_swish: @p alpha -- sigmoid arg scaling, @p beta ignored
1974     ///  - #dnnl_eltwise_log: @p alpha and @p beta ignored
1975     ///  - #dnnl_eltwise_clip: @p alpha -- lower bound, @p beta -- upper bound
1976     ///  - #dnnl_eltwise_clip_v2: @p alpha -- lower bound, @p beta -- upper bound
1977     ///  - #dnnl_eltwise_pow: @p alpha -- scale, @p beta -- exponent
1978     ///  - #dnnl_eltwise_gelu_erf: @p alpha and @p beta ignored
1979     ///  - #dnnl_eltwise_round: @p alpha and @p beta ignored
1980     ///  - #dnnl_eltwise_logsigmoid @p alpha and @p beta ignored
1981     ///  - #dnnl_eltwise_mish @p alpha and @p beta ignored
1982     ///  - #dnnl_eltwise_hardswish @p alpha and @p beta ignored
1983     float alpha, beta;
1984 } dnnl_eltwise_desc_t;
1985 
1986 /// @} dnnl_api_eltwise
1987 
1988 /// @addtogroup dnnl_api_softmax
1989 /// @{
1990 
1991 /// A descriptor of a Softmax operation.
1992 typedef struct {
1993     /// The kind of primitive. Used for self-identifying the primitive
1994     /// descriptor. Must be #dnnl_softmax.
1995     dnnl_primitive_kind_t primitive_kind;
1996     /// The kind of propagation. Possible values: #dnnl_forward_training and
1997     /// #dnnl_forward_inference.
1998     dnnl_prop_kind_t prop_kind;
1999     /// Source and destination memory descriptor.
2000     dnnl_memory_desc_t data_desc;
2001     /// Source and Destination of gradient memory descriptor.
2002     dnnl_memory_desc_t diff_desc;
2003     /// The axis along which to perform the softmax.
2004     int softmax_axis;
2005 } dnnl_softmax_desc_t;
2006 
2007 /// @} dnnl_api_softmax
2008 
2009 /// @addtogroup dnnl_api_logsoftmax
2010 /// @{
2011 
2012 /// A descriptor of a LogSoftmax operation. An alias of Softmax structure, but
2013 /// primitive_kind must be #dnnl_logsoftmax.
2014 typedef dnnl_softmax_desc_t dnnl_logsoftmax_desc_t;
2015 
2016 /// @} dnnl_api_logsoftmax
2017 
2018 /// @addtogroup dnnl_api_pooling
2019 /// @{
2020 
2021 /// A descriptor of a pooling operation.
2022 typedef struct {
2023     /// The kind of primitive. Used for self-identifying the primitive
2024     /// descriptor. Must be #dnnl_pooling.
2025     dnnl_primitive_kind_t primitive_kind;
2026     /// The kind of propagation. Possible values: #dnnl_forward_training,
2027     /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2028     dnnl_prop_kind_t prop_kind;
2029     /// The kind of pooling algorithm.
2030     /// Possible values: #dnnl_pooling_max,
2031     /// #dnnl_pooling_avg_include_padding, and
2032     /// #dnnl_pooling_avg_exclude_padding.
2033     dnnl_alg_kind_t alg_kind;
2034     /// Source memory descriptor.
2035     dnnl_memory_desc_t src_desc;
2036     /// Source gradient memory descriptor.
2037     dnnl_memory_desc_t diff_src_desc;
2038     /// Destination memory descriptor.
2039     dnnl_memory_desc_t dst_desc;
2040     /// Destination gradient memory descriptor.
2041     dnnl_memory_desc_t diff_dst_desc;
2042     /// Pooling kernel strides for spatial dimensions.
2043     dnnl_dims_t strides;
2044     /// Pooling kernel spatial dimensions.
2045     dnnl_dims_t kernel;
2046     /// Padding in each spatial dimension. padding[0] is a padding in the
2047     /// beginning (@p padding_l), padding[1] is a padding in the end (@p
2048     /// padding_r).
2049     dnnl_dims_t padding[2];
2050     /// The accumulator data type. Initialized automatically.
2051     dnnl_data_type_t accum_data_type;
2052 } dnnl_pooling_desc_t;
2053 
2054 /// @} dnnl_api_pooling
2055 
2056 /// @addtogroup dnnl_api_pooling_v2
2057 /// @{
2058 
2059 /// A descriptor of a pooling operation.
2060 typedef struct {
2061     /// The kind of primitive. Used for self-identifying the primitive
2062     /// descriptor. Must be #dnnl_pooling_v2.
2063     dnnl_primitive_kind_t primitive_kind;
2064     /// The kind of propagation. Possible values: #dnnl_forward_training,
2065     /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2066     dnnl_prop_kind_t prop_kind;
2067     /// The kind of pooling algorithm.
2068     /// Possible values: #dnnl_pooling_max,
2069     /// #dnnl_pooling_avg_include_padding, and
2070     /// #dnnl_pooling_avg_exclude_padding.
2071     dnnl_alg_kind_t alg_kind;
2072     /// Source memory descriptor.
2073     dnnl_memory_desc_t src_desc;
2074     /// Source gradient memory descriptor.
2075     dnnl_memory_desc_t diff_src_desc;
2076     /// Destination memory descriptor.
2077     dnnl_memory_desc_t dst_desc;
2078     /// Destination gradient memory descriptor.
2079     dnnl_memory_desc_t diff_dst_desc;
2080     /// Pooling kernel strides for spatial dimensions.
2081     dnnl_dims_t strides;
2082     /// Pooling kernel spatial dimensions.
2083     dnnl_dims_t kernel;
2084     /// Padding in each spatial dimension. padding[0] is a padding in the
2085     /// beginning (@p padding_l), padding[1] is a padding in the end (@p
2086     /// padding_r).
2087     dnnl_dims_t padding[2];
2088     /// The accumulator data type. Initialized automatically.
2089     dnnl_data_type_t accum_data_type;
2090     /// Pooling dilations for spatial dimensions.
2091     dnnl_dims_t dilation;
2092 } dnnl_pooling_v2_desc_t;
2093 
2094 /// @} dnnl_api_pooling_v2
2095 
2096 /// @addtogroup dnnl_api_prelu
2097 /// @{
2098 typedef struct {
2099     /// The kind of primitive. Used for self-identifying the primitive
2100     /// descriptor. Must be #dnnl_prelu.
2101     dnnl_primitive_kind_t primitive_kind;
2102     /// The kind of propagation. Possible values: #dnnl_forward_training,
2103     /// #dnnl_forward_inference, #dnnl_backward
2104     dnnl_prop_kind_t prop_kind;
2105     /// Source and destination memory descriptor.
2106     dnnl_memory_desc_t data_desc;
2107     /// Learnable parameter alpha memory descriptor.
2108     /// Alpha describes negative slope.
2109     dnnl_memory_desc_t weights_desc;
2110     /// Source and destination gradient memory descriptor.
2111     dnnl_memory_desc_t diff_data_desc;
2112     /// Learnable parameter alpha gradient memory descriptor.
2113     dnnl_memory_desc_t diff_weights_desc;
2114 } dnnl_prelu_desc_t;
2115 
2116 /// @} dnnl_api_prelu
2117 
2118 /// @addtogroup dnnl_api_lrn
2119 /// @{
2120 
2121 /// A descriptor of a Local Response Normalization (LRN) operation.
2122 typedef struct {
2123     /// The kind of primitive. Used for self-identifying the primitive
2124     /// descriptor. Must be #dnnl_lrn.
2125     dnnl_primitive_kind_t primitive_kind;
2126     /// The kind of propagation. Possible values: #dnnl_forward_training,
2127     /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2128     dnnl_prop_kind_t prop_kind;
2129     /// LRN algorithm. Possible values: #dnnl_lrn_within_channel and
2130     /// #dnnl_lrn_across_channels.
2131     dnnl_alg_kind_t alg_kind;
2132     /// Source and destination memory descriptor.
2133     dnnl_memory_desc_t data_desc;
2134     /// Source and destination gradient memory descriptor.
2135     dnnl_memory_desc_t diff_data_desc;
2136     /// The number of channels to sum over (for cross-channel LRN) or the side
2137     /// length of the square region to sum over (for within-channel LRN).
2138     dnnl_dim_t local_size;
2139     /// LRN alpha parameter.
2140     float lrn_alpha;
2141     /// LRN beta parameter.
2142     float lrn_beta;
2143     /// LRN k parameter.
2144     float lrn_k;
2145 } dnnl_lrn_desc_t;
2146 
2147 /// @} dnnl_api_lrn
2148 
2149 /// @addtogroup dnnl_api_batch_normalization
2150 /// @{
2151 
2152 /// A descriptor of a Batch Normalization operation.
2153 typedef struct {
2154     /// The kind of primitive. Used for self-identifying the primitive
2155     /// descriptor. Must be #dnnl_batch_normalization.
2156     dnnl_primitive_kind_t primitive_kind;
2157     /// The kind of propagation. Possible values: #dnnl_forward_training,
2158     /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2159     dnnl_prop_kind_t prop_kind;
2160     /// Source and destination memory descriptor.
2161     dnnl_memory_desc_t data_desc;
2162     /// Source and destination gradient memory descriptor.
2163     dnnl_memory_desc_t diff_data_desc;
2164     /// Scale and shift data and gradient memory descriptors.
2165     ///
2166     /// Scaleshift memory descriptor uses 2D #dnnl_nc format[2,Channels]. 1-st
2167     /// dimension contains gamma parameter, 2-nd dimension contains beta
2168     /// parameter.
2169     dnnl_memory_desc_t data_scaleshift_desc;
2170     dnnl_memory_desc_t diff_data_scaleshift_desc;
2171     /// Statistics memory descriptor.
2172     ///
2173     /// Statistics (mean or variance) descriptor use 1D #dnnl_x format[Channels].
2174     dnnl_memory_desc_t stat_desc;
2175     /// Batch normalization epsilon parameter.
2176     float batch_norm_epsilon;
2177     unsigned flags;
2178 } dnnl_batch_normalization_desc_t;
2179 
2180 /// @} dnnl_api_batch_normalization
2181 
2182 /// @addtogroup dnnl_api_layer_normalization
2183 /// @{
2184 
2185 /// A descriptor of a Layer Normalization operation.
2186 typedef struct {
2187     /// The kind of primitive. Used for self-identifying the primitive
2188     /// descriptor. Must be #dnnl_layer_normalization.
2189     dnnl_primitive_kind_t primitive_kind;
2190     /// The kind of propagation. Possible values: #dnnl_forward_training,
2191     /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2192     dnnl_prop_kind_t prop_kind;
2193     /// Source and destination memory descriptor.
2194     dnnl_memory_desc_t data_desc;
2195     /// Source and destination gradient memory descriptor.
2196     dnnl_memory_desc_t diff_data_desc;
2197     /// Scale and shift data and gradient memory descriptors.
2198     ///
2199     /// Scaleshift memory descriptor uses 2D #dnnl_ab
2200     /// format[2, normalized_dim] where 1-st dimension contains gamma parameter,
2201     /// 2-nd dimension contains beta parameter. Normalized_dim is equal to the
2202     /// last logical dimension of the data tensor across which normalization is
2203     /// performed.
2204     dnnl_memory_desc_t data_scaleshift_desc;
2205     dnnl_memory_desc_t diff_data_scaleshift_desc;
2206     /// Mean and variance data memory descriptors.
2207     ///
2208     /// Statistics (mean and variance) memory descriptor is the k-dimensional tensor
2209     /// where k is equal to data_tensor_ndims - 1 and may have any plain
2210     /// (stride[last_dim] == 1) user-provided format.
2211     dnnl_memory_desc_t stat_desc;
2212     /// Layer normalization epsilon parameter.
2213     float layer_norm_epsilon;
2214     unsigned flags;
2215 } dnnl_layer_normalization_desc_t;
2216 
2217 /// @} dnnl_api_layer_normalization
2218 
2219 /// @addtogroup dnnl_api_inner_product
2220 /// @{
2221 
2222 /// A descriptor of an inner product operation.
2223 typedef struct {
2224     /// The kind of primitive. Used for self-identifying the primitive
2225     /// descriptor. Must be #dnnl_inner_product.
2226     dnnl_primitive_kind_t primitive_kind;
2227     /// The kind of propagation. Possible values: #dnnl_forward_training,
2228     /// #dnnl_forward_inference, #dnnl_backward_data,
2229     /// #dnnl_backward_weights, and #dnnl_backward_bias.
2230     dnnl_prop_kind_t prop_kind;
2231     /// Source memory descriptor.
2232     dnnl_memory_desc_t src_desc;
2233     /// Source gradient memory descriptor.
2234     dnnl_memory_desc_t diff_src_desc;
2235     /// Weights memory descriptor.
2236     dnnl_memory_desc_t weights_desc;
2237     /// Weights gradient memory descriptor.
2238     dnnl_memory_desc_t diff_weights_desc;
2239     /// Bias memory descriptor.
2240     dnnl_memory_desc_t bias_desc;
2241     /// Bias gradient memory descriptor.
2242     dnnl_memory_desc_t diff_bias_desc;
2243     /// Destination memory descriptor.
2244     dnnl_memory_desc_t dst_desc;
2245     /// Destination gradient memory descriptor.
2246     dnnl_memory_desc_t diff_dst_desc;
2247     /// The accumulator data type. Initialized automatically.
2248     dnnl_data_type_t accum_data_type;
2249 } dnnl_inner_product_desc_t;
2250 
2251 /// @} dnnl_api_inner_product
2252 
2253 /// @addtogroup dnnl_api_rnn
2254 /// @{
2255 
2256 /// Flags for RNN cell.
2257 typedef enum {
2258     /// Undefined RNN flags
2259     dnnl_rnn_flags_undef = 0x0
2260 } dnnl_rnn_flags_t;
2261 
2262 /// A direction of RNN primitive execution.
2263 typedef enum {
2264     /// Unidirectional execution of RNN primitive from left to right.
2265     dnnl_unidirectional_left2right,
2266     /// Unidirectional execution of RNN primitive from right to left.
2267     dnnl_unidirectional_right2left,
2268     /// Bidirectional execution of RNN primitive with concatenation of the
2269     /// results.
2270     dnnl_bidirectional_concat,
2271     /// Bidirectional execution of RNN primitive with summation of the
2272     /// results.
2273     dnnl_bidirectional_sum,
2274     /// Alias for #dnnl_unidirectional_left2right.
2275     dnnl_unidirectional = dnnl_unidirectional_left2right,
2276 } dnnl_rnn_direction_t;
2277 
2278 /// A descriptor for an RNN operation.
2279 typedef struct {
2280     /// The kind of primitive. Used for self-identifying the primitive
2281     /// descriptor. Must be #dnnl_rnn.
2282     dnnl_primitive_kind_t primitive_kind;
2283     /// The kind of propagation. Possible values: #dnnl_forward_training,
2284     /// #dnnl_forward_inference, and #dnnl_backward.
2285     dnnl_prop_kind_t prop_kind;
2286     /// RNN cell kind. Must be one of #dnnl_vanilla_rnn,
2287     /// #dnnl_vanilla_lstm, #dnnl_vanilla_gru, or #dnnl_lbr_gru.
2288     dnnl_alg_kind_t cell_kind;
2289     /// The direction of RNN primitive execution.
2290     dnnl_rnn_direction_t direction;
2291     /// Source layer memory descriptor.
2292     dnnl_memory_desc_t src_layer_desc;
2293     /// Source iteration memory descriptor for hidden state.
2294     dnnl_memory_desc_t src_iter_desc;
2295     /// Source iteration memory descriptor for cell state.
2296     dnnl_memory_desc_t src_iter_c_desc;
2297     /// Weights layer memory descriptor.
2298     dnnl_memory_desc_t weights_layer_desc;
2299     /// Weights iteration memory descriptor.
2300     dnnl_memory_desc_t weights_iter_desc;
2301     /// Bias memory descriptor.
2302     dnnl_memory_desc_t bias_desc;
2303     /// Destination layer memory descriptor.
2304     dnnl_memory_desc_t dst_layer_desc;
2305     /// Destination iter memory descriptor for hidden state.
2306     dnnl_memory_desc_t dst_iter_desc;
2307     /// Destination iter memory descriptor for cell state.
2308     dnnl_memory_desc_t dst_iter_c_desc;
2309     /// Weights peephole memory descriptor.
2310     /// This memory descriptor is equal to zero memory descriptor in case of
2311     /// non-peephole LSTMs and other non-LSTM RNNs.
2312     dnnl_memory_desc_t weights_peephole_desc;
2313     /// Weights projection memory descriptor.
2314     /// This memory descriptor is equal to zero memory descriptor in case of
2315     /// non-projection LSTMs and other non-LSTM RNNs.
2316     dnnl_memory_desc_t weights_projection_desc;
2317 
2318     /// Source gradient layer memory descriptor.
2319     dnnl_memory_desc_t diff_src_layer_desc;
2320     /// Source gradient iter memory descriptor for hidden state.
2321     dnnl_memory_desc_t diff_src_iter_desc;
2322     /// Source gradient iter memory descriptor for cell state.
2323     dnnl_memory_desc_t diff_src_iter_c_desc;
2324     /// Weights gradient layer memory descriptor.
2325     dnnl_memory_desc_t diff_weights_layer_desc;
2326     /// Weights gradient iter memory descriptor.
2327     dnnl_memory_desc_t diff_weights_iter_desc;
2328     /// Bias gradient memory descriptor.
2329     dnnl_memory_desc_t diff_bias_desc;
2330     /// Destination gradient layer memory descriptor.
2331     dnnl_memory_desc_t diff_dst_layer_desc;
2332     /// Destination gradient iteration memory descriptor for hidden state.
2333     dnnl_memory_desc_t diff_dst_iter_desc;
2334     /// Destination gradient iteration memory descriptor for cell state.
2335     dnnl_memory_desc_t diff_dst_iter_c_desc;
2336     /// Weights gradient peephole memory descriptor.
2337     /// This memory descriptor is equal to zero memory descriptor in case of
2338     /// non-peephole LSTMs and other non-LSTM RNNs.
2339     dnnl_memory_desc_t diff_weights_peephole_desc;
2340     /// Weights gradient projection memory descriptor.
2341     /// This memory descriptor is equal to zero memory descriptor in case of
2342     /// non-projection LSTMs and other non-LSTM RNNs.
2343     dnnl_memory_desc_t diff_weights_projection_desc;
2344 
2345     /// RNN cell flags
2346     unsigned int flags;
2347     /// Activation function used for vanilla_rnn cell kind.
2348     /// Must be either #dnnl_eltwise_relu or #dnnl_eltwise_tanh.
2349     dnnl_alg_kind_t activation_kind;
2350     float alpha;
2351     float beta;
2352 
2353 } dnnl_rnn_desc_t;
2354 
2355 /// @} dnnl_api_rnn
2356 
2357 /// @addtogroup dnnl_api_binary
2358 /// @{
2359 
2360 /// A descriptor of a binary operation.
2361 typedef struct {
2362     /// The kind of primitive. Used for self-identifying the primitive
2363     /// descriptor. Must be #dnnl_binary.
2364     dnnl_primitive_kind_t primitive_kind;
2365     /// The kind of the binary algorithm. Possible values:
2366     /// #dnnl_binary_add, #dnnl_binary_mul, #dnnl_binary_max, #dnnl_binary_min,
2367     /// #dnnl_binary_div and #dnnl_binary_sub.
2368     dnnl_alg_kind_t alg_kind;
2369     /// Source memory descriptors.
2370     dnnl_memory_desc_t src_desc[2];
2371     /// Destination memory descriptor.
2372     dnnl_memory_desc_t dst_desc;
2373 } dnnl_binary_desc_t;
2374 
2375 /// @} dnnl_api_binary
2376 
2377 /// @addtogroup dnnl_api_matmul
2378 /// @{
2379 
2380 /// A descriptor of a matrix multiplication operation.
2381 ///
2382 /// 2D case:
2383 ///     dst[m, n] = src[m, k] * weights[k, n] + bias[m, n]
2384 ///
2385 /// 3D case:
2386 ///     dst[mb, m, n] = src[mb, m, k] * weights[mb, k, n] + bias[mb, m, n]
2387 typedef struct {
2388     /// The kind of primitive. Used for self-identifying the primitive
2389     /// descriptor. Must be #dnnl_matmul.
2390     dnnl_primitive_kind_t primitive_kind;
2391     /// Source memory descriptor.
2392     dnnl_memory_desc_t src_desc;
2393     /// Weights memory descriptor.
2394     dnnl_memory_desc_t weights_desc;
2395     /// Bias memory descriptor.
2396     dnnl_memory_desc_t bias_desc;
2397     /// Destination memory descriptor.
2398     dnnl_memory_desc_t dst_desc;
2399     /// The accumulator data type. Initialized automatically.
2400     dnnl_data_type_t accum_data_type;
2401 } dnnl_matmul_desc_t;
2402 
2403 /// @} dnnl_api_matmul
2404 
2405 /// @addtogroup dnnl_api_resampling
2406 /// @{
2407 
2408 /// A descriptor of resampling operation.
2409 typedef struct {
2410     /// The kind of primitive. Used for self-identifying the primitive
2411     /// descriptor. Must be #dnnl_resampling.
2412     dnnl_primitive_kind_t primitive_kind;
2413     /// The kind of propagation. Possible values: #dnnl_forward_training,
2414     /// #dnnl_forward_inference, #dnnl_backward_data,
2415     dnnl_prop_kind_t prop_kind;
2416     /// The kind of the resampling algorithm. Possible values:
2417     /// #dnnl_resampling_nearest, #dnnl_resampling_linear.
2418     dnnl_alg_kind_t alg_kind;
2419     /// Source memory descriptor.
2420     dnnl_memory_desc_t src_desc;
2421     /// Source gradient memory descriptor.
2422     dnnl_memory_desc_t diff_src_desc;
2423     /// Destination memory descriptor.
2424     dnnl_memory_desc_t dst_desc;
2425     /// Destination gradient memory descriptor.
2426     dnnl_memory_desc_t diff_dst_desc;
2427     /// Resampling factor in each spatial dimension.
2428     float factors[DNNL_MAX_NDIMS];
2429 } dnnl_resampling_desc_t;
2430 
2431 /// @} dnnl_api_resampling
2432 
2433 /// @addtogroup dnnl_api_reduction
2434 /// @{
2435 
2436 /// A descriptor of reduction operation.
2437 typedef struct {
2438     /// The kind of primitive. Used for self-identifying the primitive
2439     /// descriptor. Must be #dnnl_reduction.
2440     dnnl_primitive_kind_t primitive_kind;
2441     /// The kind of reduction algorithm. Possible values:
2442     /// #dnnl_reduction_max, #dnnl_reduction_min, #dnnl_reduction_sum,
2443     /// #dnnl_reduction_mul, #dnnl_reduction_mean, #dnnl_reduction_norm_lp_max,
2444     /// #dnnl_reduction_norm_lp_sum, #dnnl_reduction_norm_lp_power_p_max,
2445     /// #dnnl_reduction_norm_lp_power_p_sum.
2446     dnnl_alg_kind_t alg_kind;
2447     /// Source memory descriptor.
2448     dnnl_memory_desc_t src_desc;
2449     /// Destination memory descriptor.
2450     dnnl_memory_desc_t dst_desc;
2451     /// Algorithm specific parameters.
2452     /// Accordance table:
2453     /// #dnnl_reduction_max: @p p and @p eps are ignored
2454     /// #dnnl_reduction_min: @p p and @p eps are ignored
2455     /// #dnnl_reduction_norm_lp_max: @p p -- power, @p eps -- epsilon
2456     /// #dnnl_reduction_norm_lp_sum: @p p -- power, @p eps -- epsilon
2457     /// #dnnl_reduction_norm_lp_power_p_max: @p p -- power, @p eps -- epsilon
2458     /// #dnnl_reduction_norm_lp_power_p_sum: @p p -- power, @p eps -- epsilon
2459     /// #dnnl_reduction_sum: @p p and @p eps are ignored
2460     /// #dnnl_reduction_mul: @p p and @p eps are ignored
2461     /// #dnnl_reduction_mean: @p p and @p eps are ignored
2462     float p, eps;
2463 } dnnl_reduction_desc_t;
2464 
2465 /// @} dnnl_api_reduction
2466 
2467 /// @} dnnl_api_primitives
2468 
2469 /// @addtogroup dnnl_api_engine
2470 /// @{
2471 
2472 /// @brief Kinds of engines.
2473 typedef enum {
2474     /// An unspecified engine.
2475     dnnl_any_engine,
2476     /// CPU engine.
2477     dnnl_cpu,
2478     /// GPU engine.
2479     dnnl_gpu,
2480 } dnnl_engine_kind_t;
2481 
2482 /// @struct dnnl_engine
2483 /// @brief An opaque structure to describe an engine.
2484 struct dnnl_engine;
2485 /// @brief An engine handle.
2486 typedef struct dnnl_engine *dnnl_engine_t;
2487 #if 0
2488 // FIXME: looks like this never happens
2489 /// @brief A constant engine handle.
2490 typedef const struct dnnl_engine *const_dnnl_engine_t;
2491 #endif
2492 
2493 /// @} dnnl_api_engine
2494 
2495 /// @addtogroup dnnl_api_primitives
2496 /// @{
2497 /// @addtogroup dnnl_api_primitives_common
2498 /// @{
2499 
2500 /// @struct dnnl_primitive_desc_iterator
2501 /// @brief An opaque structure to describe a primitive descriptor iterator.
2502 struct dnnl_primitive_desc_iterator;
2503 
2504 /// @brief A primitive descriptor iterator handle.
2505 typedef struct dnnl_primitive_desc_iterator *dnnl_primitive_desc_iterator_t;
2506 
2507 /// @brief A constant primitive descriptor iterator handle.
2508 typedef const struct dnnl_primitive_desc_iterator
2509         *const_dnnl_primitive_desc_iterator_t;
2510 
2511 /// @struct dnnl_primitive_desc
2512 /// @brief An opaque structure to describe a primitive descriptor.
2513 struct dnnl_primitive_desc;
2514 
2515 /// @brief A primitive descriptor handle.
2516 typedef struct dnnl_primitive_desc *dnnl_primitive_desc_t;
2517 
2518 /// @brief A constant primitive descriptor handle.
2519 typedef const struct dnnl_primitive_desc *const_dnnl_primitive_desc_t;
2520 
2521 /// @} dnnl_api_primitives_common
2522 
2523 /// @addtogroup dnnl_api_attributes
2524 /// @{
2525 
2526 /// Floating-point math mode
2527 typedef enum {
2528     /// Default behavior, no downconversions allowed
2529     dnnl_fpmath_mode_strict,
2530     /// Implicit f32->bf16 conversions allowed
2531     dnnl_fpmath_mode_bf16,
2532     /// Implicit f32->f16 conversions allowed
2533     dnnl_fpmath_mode_f16,
2534     /// Implicit f32->f16 or f32->bf16 conversions allowed
2535     dnnl_fpmath_mode_any,
2536 } dnnl_fpmath_mode_t;
2537 
2538 /// Scratchpad mode
2539 typedef enum {
2540     /// The library manages the scratchpad allocation according to the policy
2541     /// specified by the `DNNL_ENABLE_CONCURRENT_EXEC`
2542     /// [build option](@ref dev_guide_build_options) (default).
2543     ///
2544     /// When `DNNL_ENABLE_CONCURRENT_EXEC=OFF` (default), the library
2545     /// scratchpad is common to all primitives to reduce the memory footprint.
2546     /// This configuration comes with limited thread-safety properties, namely
2547     /// primitives can be created and executed in parallel but cannot migrate
2548     /// between threads (in other words, each primitive should be executed in
2549     /// the same thread it was created in).
2550     ///
2551     /// When `DNNL_ENABLE_CONCURRENT_EXEC=ON`, the library scratchpad is
2552     /// private to each primitive. The memory footprint is larger than when
2553     /// using `DNNL_ENABLE_CONCURRENT_EXEC=OFF` but different primitives can be
2554     /// created and run concurrently (the same primitive cannot be run
2555     /// concurrently from two different threads though).
2556     dnnl_scratchpad_mode_library,
2557     /// The user manages the scratchpad allocation by querying and providing
2558     /// the scratchpad memory to primitives. This mode is thread-safe as long
2559     /// as the scratchpad buffers are not used concurrently by two primitive
2560     /// executions.
2561     dnnl_scratchpad_mode_user,
2562 } dnnl_scratchpad_mode_t;
2563 
2564 /// @struct dnnl_primitive_attr
2565 /// @brief An opaque structure for primitive descriptor attributes.
2566 ///
2567 /// Attributes may contain:
2568 ///  - output scales (to scale the result prior to storing it to the memory)
2569 struct dnnl_primitive_attr;
2570 
2571 /// @brief A primitive descriptor attributes handle that controls primitive
2572 /// behavior.
2573 typedef struct dnnl_primitive_attr *dnnl_primitive_attr_t;
2574 
2575 /// @brief A constant primitive descriptor attributes handle.
2576 typedef const struct dnnl_primitive_attr *const_dnnl_primitive_attr_t;
2577 
2578 /// @struct dnnl_post_ops
2579 /// @brief An opaque structure for a chain of post operations.
2580 ///
2581 /// dnnl_post_ops can be used to perform some (trivial) operations like
2582 /// accumulation or eltwise after certain primitives like convolution.
2583 ///
2584 /// Post operations might be combined together, making a chain of post
2585 /// operations. For instance one can configure convolution followed by
2586 /// accumulation followed by eltwise. This might be especially beneficial
2587 /// for residual learning blocks.
2588 ///
2589 /// @warning
2590 ///      Of course not all combinations are supported, so the user should handle
2591 ///      errors accordingly.
2592 ///
2593 /// Supported post operations:
2594 ///  - accumulation (base primitive: convolution)
2595 ///  - eltwise (base primitive: convolution)
2596 struct dnnl_post_ops;
2597 
2598 /// @brief A post operation chain handle.
2599 typedef struct dnnl_post_ops *dnnl_post_ops_t;
2600 
2601 /// @brief A constant post operation chain handle.
2602 typedef const struct dnnl_post_ops *const_dnnl_post_ops_t;
2603 
2604 /// @} dnnl_api_attributes
2605 
2606 /// @addtogroup dnnl_api_primitives_common
2607 /// @{
2608 
2609 /// @struct dnnl_primitive
2610 /// An opaque structure to describe a primitive.
2611 struct dnnl_primitive;
2612 /// A primitive handle.
2613 typedef struct dnnl_primitive *dnnl_primitive_t;
2614 /// A constant primitive handle.
2615 typedef const struct dnnl_primitive *const_dnnl_primitive_t;
2616 
2617 /// Source argument #0.
2618 #define DNNL_ARG_SRC_0 1
2619 /// A special mnemonic for source argument for primitives that have a
2620 /// single source. An alias for #DNNL_ARG_SRC_0.
2621 #define DNNL_ARG_SRC DNNL_ARG_SRC_0
2622 /// A special mnemonic for RNN input vector. An alias for
2623 /// #DNNL_ARG_SRC_0.
2624 #define DNNL_ARG_SRC_LAYER DNNL_ARG_SRC_0
2625 /// A special mnemonic for reorder source argument. An alias for
2626 /// #DNNL_ARG_SRC_0.
2627 #define DNNL_ARG_FROM DNNL_ARG_SRC_0
2628 
2629 /// Source argument #1.
2630 #define DNNL_ARG_SRC_1 2
2631 /// A special mnemonic for RNN input recurrent hidden state vector. An alias
2632 /// for #DNNL_ARG_SRC_1.
2633 #define DNNL_ARG_SRC_ITER DNNL_ARG_SRC_1
2634 
2635 /// Source argument #2.
2636 #define DNNL_ARG_SRC_2 3
2637 /// A special mnemonic for RNN input recurrent cell state vector. An alias for
2638 /// #DNNL_ARG_SRC_2.
2639 #define DNNL_ARG_SRC_ITER_C DNNL_ARG_SRC_2
2640 
2641 /// Destination argument #0.
2642 #define DNNL_ARG_DST_0 17
2643 /// A special mnemonic for destination argument for primitives that have a
2644 /// single destination. An alias for #DNNL_ARG_DST_0.
2645 #define DNNL_ARG_DST DNNL_ARG_DST_0
2646 /// A special mnemonic for reorder destination argument. An alias for
2647 /// #DNNL_ARG_DST_0.
2648 #define DNNL_ARG_TO DNNL_ARG_DST_0
2649 /// A special mnemonic for RNN output vector. An alias for #DNNL_ARG_DST_0.
2650 #define DNNL_ARG_DST_LAYER DNNL_ARG_DST_0
2651 
2652 /// Destination argument #1.
2653 #define DNNL_ARG_DST_1 18
2654 /// A special mnemonic for RNN input recurrent hidden state vector. An
2655 /// alias for #DNNL_ARG_DST_1.
2656 #define DNNL_ARG_DST_ITER DNNL_ARG_DST_1
2657 
2658 /// Destination argument #2.
2659 #define DNNL_ARG_DST_2 19
2660 /// A special mnemonic for LSTM output recurrent cell state vector. An
2661 /// alias for #DNNL_ARG_DST_2.
2662 #define DNNL_ARG_DST_ITER_C DNNL_ARG_DST_2
2663 
2664 /// Weights argument #0.
2665 #define DNNL_ARG_WEIGHTS_0 33
2666 /// A special mnemonic for primitives that have a single weights
2667 /// argument. Alias for #DNNL_ARG_WEIGHTS_0.
2668 #define DNNL_ARG_WEIGHTS DNNL_ARG_WEIGHTS_0
2669 /// A special mnemonic for scale and shift argument of normalization
2670 /// primitives. Alias for #DNNL_ARG_WEIGHTS_0.
2671 #define DNNL_ARG_SCALE_SHIFT DNNL_ARG_WEIGHTS_0
2672 /// A special mnemonic for RNN weights applied to the layer input. An
2673 /// alias for #DNNL_ARG_WEIGHTS_0.
2674 #define DNNL_ARG_WEIGHTS_LAYER DNNL_ARG_WEIGHTS_0
2675 
2676 /// Weights argument #1.
2677 #define DNNL_ARG_WEIGHTS_1 34
2678 /// A special mnemonic for RNN weights applied to the recurrent input.
2679 /// An alias for #DNNL_ARG_WEIGHTS_1.
2680 #define DNNL_ARG_WEIGHTS_ITER DNNL_ARG_WEIGHTS_1
2681 
2682 /// Weights argument #2.
2683 #define DNNL_ARG_WEIGHTS_2 35
2684 /// A special mnemonic for RNN weights applied to the peephole weights.
2685 /// An alias for #DNNL_ARG_WEIGHTS_2.
2686 #define DNNL_ARG_WEIGHTS_PEEPHOLE DNNL_ARG_WEIGHTS_2
2687 
2688 /// Weights argument #3.
2689 #define DNNL_ARG_WEIGHTS_3 36
2690 /// A special mnemonic for RNN weights applied to the projection weights.
2691 /// An alias for #DNNL_ARG_WEIGHTS_3.
2692 #define DNNL_ARG_WEIGHTS_PROJECTION DNNL_ARG_WEIGHTS_3
2693 
2694 /// Bias tensor argument.
2695 #define DNNL_ARG_BIAS 41
2696 
2697 /// Mean values tensor argument.
2698 #define DNNL_ARG_MEAN 49
2699 /// Variance values tensor argument.
2700 #define DNNL_ARG_VARIANCE 50
2701 
2702 /// A special mnemonic for scale argument of normalization primitives.
2703 #define DNNL_ARG_SCALE 51
2704 /// A special mnemonic for shift argument of normalization primitives.
2705 #define DNNL_ARG_SHIFT 52
2706 
2707 /// Workspace tensor argument. Workspace is used to pass information
2708 /// from forward propagation to backward propagation computations.
2709 #define DNNL_ARG_WORKSPACE 64
2710 /// Scratchpad (temporary storage) tensor argument.
2711 #define DNNL_ARG_SCRATCHPAD 80
2712 
2713 /// Gradient (diff) of the source argument #0.
2714 #define DNNL_ARG_DIFF_SRC_0 129
2715 /// A special mnemonic for primitives that have a single diff source argument.
2716 /// An alias for #DNNL_ARG_DIFF_SRC_0.
2717 #define DNNL_ARG_DIFF_SRC DNNL_ARG_DIFF_SRC_0
2718 /// A special mnemonic for gradient (diff) of RNN input vector. An alias for
2719 /// #DNNL_ARG_DIFF_SRC_0.
2720 #define DNNL_ARG_DIFF_SRC_LAYER DNNL_ARG_DIFF_SRC_0
2721 
2722 /// Gradient (diff) of the source argument #1.
2723 #define DNNL_ARG_DIFF_SRC_1 130
2724 /// A special mnemonic for gradient (diff) of RNN input recurrent hidden state
2725 /// vector. An alias for #DNNL_ARG_DIFF_SRC_1.
2726 #define DNNL_ARG_DIFF_SRC_ITER DNNL_ARG_DIFF_SRC_1
2727 
2728 /// Gradient (diff) of the source argument #2.
2729 #define DNNL_ARG_DIFF_SRC_2 131
2730 /// A special mnemonic for gradient (diff) of RNN input recurrent cell state
2731 /// vector. An alias for #DNNL_ARG_DIFF_SRC_1.
2732 #define DNNL_ARG_DIFF_SRC_ITER_C DNNL_ARG_DIFF_SRC_2
2733 
2734 /// Gradient (diff) of the destination argument #0.
2735 #define DNNL_ARG_DIFF_DST_0 145
2736 /// A special mnemonic for primitives that have a single diff destination
2737 /// argument. An alias for #DNNL_ARG_DIFF_DST_0.
2738 #define DNNL_ARG_DIFF_DST DNNL_ARG_DIFF_DST_0
2739 /// A special mnemonic for gradient (diff) of RNN output vector. An alias for
2740 /// #DNNL_ARG_DIFF_DST_0.
2741 #define DNNL_ARG_DIFF_DST_LAYER DNNL_ARG_DIFF_DST_0
2742 
2743 /// Gradient (diff) of the destination argument #1.
2744 #define DNNL_ARG_DIFF_DST_1 146
2745 /// A special mnemonic for gradient (diff) of RNN input recurrent hidden state
2746 /// vector. An alias for #DNNL_ARG_DIFF_DST_1.
2747 #define DNNL_ARG_DIFF_DST_ITER DNNL_ARG_DIFF_DST_1
2748 
2749 /// Gradient (diff) of the destination argument #2.
2750 #define DNNL_ARG_DIFF_DST_2 147
2751 /// A special mnemonic for gradient (diff) of RNN input recurrent cell state
2752 /// vector. An alias for #DNNL_ARG_DIFF_DST_2.
2753 #define DNNL_ARG_DIFF_DST_ITER_C DNNL_ARG_DIFF_DST_2
2754 
2755 /// Gradient (diff) of the weights argument #0.
2756 #define DNNL_ARG_DIFF_WEIGHTS_0 161
2757 /// A special mnemonic for primitives that have a single diff weights
2758 /// argument. Alias for #DNNL_ARG_DIFF_WEIGHTS_0.
2759 #define DNNL_ARG_DIFF_WEIGHTS DNNL_ARG_DIFF_WEIGHTS_0
2760 /// A special mnemonic for diff of scale and shift argument of normalization
2761 /// primitives. Alias for #DNNL_ARG_DIFF_WEIGHTS_0.
2762 #define DNNL_ARG_DIFF_SCALE_SHIFT DNNL_ARG_DIFF_WEIGHTS_0
2763 /// A special mnemonic for diff of RNN weights applied to the layer input. An
2764 /// alias for #DNNL_ARG_DIFF_WEIGHTS_0.
2765 #define DNNL_ARG_DIFF_WEIGHTS_LAYER DNNL_ARG_DIFF_WEIGHTS_0
2766 
2767 /// Gradient (diff) of the weights argument #1.
2768 #define DNNL_ARG_DIFF_WEIGHTS_1 162
2769 /// A special mnemonic for diff of RNN weights applied to the recurrent input.
2770 /// An alias for #DNNL_ARG_DIFF_WEIGHTS_1.
2771 #define DNNL_ARG_DIFF_WEIGHTS_ITER DNNL_ARG_DIFF_WEIGHTS_1
2772 
2773 /// Gradient (diff) of the weights argument #2.
2774 #define DNNL_ARG_DIFF_WEIGHTS_2 163
2775 /// A special mnemonic for diff of RNN weights applied to the peephole weights.
2776 /// An alias for #DNNL_ARG_DIFF_WEIGHTS_2.
2777 #define DNNL_ARG_DIFF_WEIGHTS_PEEPHOLE DNNL_ARG_DIFF_WEIGHTS_2
2778 
2779 /// Gradient (diff) of the weights argument #3.
2780 #define DNNL_ARG_DIFF_WEIGHTS_3 164
2781 /// A special mnemonic for diff of RNN weights applied to the projection
2782 /// weights. An alias for #DNNL_ARG_DIFF_WEIGHTS_3.
2783 #define DNNL_ARG_DIFF_WEIGHTS_PROJECTION DNNL_ARG_DIFF_WEIGHTS_3
2784 
2785 /// Gradient (diff) of the bias tensor argument.
2786 #define DNNL_ARG_DIFF_BIAS 169
2787 
2788 /// A special mnemonic for scale argument of normalization primitives.
2789 #define DNNL_ARG_DIFF_SCALE 255
2790 /// A special mnemonic for shift argument of normalization primitives.
2791 #define DNNL_ARG_DIFF_SHIFT 256
2792 
2793 /// Output scaling factors provided at execution time.
2794 #define DNNL_ARG_ATTR_OUTPUT_SCALES 513
2795 
2796 /// Starting index for source arguments for primitives that take a variable
2797 /// number of source arguments.
2798 #define DNNL_ARG_MULTIPLE_SRC 1024
2799 /// Starting index for destination arguments for primitives that produce a
2800 /// variable number of destination arguments.
2801 #define DNNL_ARG_MULTIPLE_DST 2048
2802 
2803 /// Zero points provided at execution time.
2804 #define DNNL_ARG_ATTR_ZERO_POINTS 4096
2805 
2806 /// Arguments for fused depthwise convolution.
2807 /// See @ref dev_guide_attributes_post_ops_depthwise_fusion
2808 #define DNNL_ARG_ATTR_POST_OP_DW 8192
2809 
2810 /// Starting point for a binary post operation.
2811 #define DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE 16384
2812 
2813 /// Arguments for a binary post operation. Up to 32 arguments are supported.
2814 /// See @ref dev_guide_attributes_post_ops_binary_fusion
2815 #define DNNL_ARG_ATTR_MULTIPLE_POST_OP(idx) \
2816     (DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE * ((idx) + 1))
2817 
2818 /// Input scaling factors provided at execution time.
2819 #define DNNL_ARG_ATTR_INPUT_SCALES 1048576
2820 
2821 /// A structure that contains an index and a memory object, and is used to pass
2822 /// arguments to dnnl_primitive_execute().
2823 typedef struct {
2824     int arg; ///< An argument index, e.g. DNNL_ARG_SRC
2825     dnnl_memory_t memory; ///< Input/output memory
2826 } dnnl_exec_arg_t;
2827 
2828 /// @} dnnl_api_primitives_common
2829 
2830 /// @addtogroup dnnl_api_primitives_common
2831 /// @{
2832 
2833 /// Primitive descriptor query specification
2834 ///
2835 /// For generic function dnnl_primitive_desc_query(), the type of result must
2836 /// agree with the queried argument. The correspondence table:
2837 ///
2838 /// Query kind                      | Type of query result
2839 /// --------------------------------|-----------------------------
2840 /// #dnnl_query_engine              | #dnnl_engine_t *
2841 /// #dnnl_query_scratchpad_engine   | #dnnl_engine_t *
2842 /// #dnnl_query_primitive_kind      | #dnnl_primitive_kind_t *
2843 /// dnnl_query_*_s32                | int *
2844 /// dnnl_query_*_s64                | #dnnl_dim_t * (same as int64_t *)
2845 /// dnnl_query_*_f64                | double *
2846 /// dnnl_query_*_str                | const char **
2847 /// #dnnl_query_op_d                | #const_dnnl_op_desc_t *
2848 /// dnnl_query_*_md                 | const #dnnl_memory_desc_t **
2849 /// dnnl_query_*_\<op\>_d           | const dnnl_\<op\>_desc_t **
2850 /// dnnl_query_*_pd                 | #const_dnnl_primitive_desc_t *
2851 ///
2852 /// @note
2853 ///     Rule of thumb: all opaque types and structures are returned by
2854 ///     reference. All numbers are returned by value.
2855 ///
2856 /// @warning
2857 ///     All returned references point to constant objects and are valid only
2858 ///     during the lifetime of the queried primitive descriptor. Returned objects
2859 ///     must not be destroyed by the user. If you need to keep the object longer
2860 ///     than the lifetime of the queried primitive descriptor, use
2861 ///     dnnl_primitive_desc_clone() to make a copy.
2862 typedef enum {
2863     dnnl_query_undef = 0, ///< no query
2864 
2865     dnnl_query_engine, ///< execution engine
2866     dnnl_query_primitive_kind, ///< primitive kind
2867 
2868     dnnl_query_num_of_inputs_s32, ///< number of inputs expected
2869     dnnl_query_num_of_outputs_s32, ///< number of outputs expected
2870 
2871     dnnl_query_time_estimate_f64, ///< runtime estimation (seconds)
2872     dnnl_query_memory_consumption_s64, ///< memory consumption -- extra
2873     ///  (scratch) memory, additional to
2874     ///  all inputs and outputs memory
2875     ///  (bytes)
2876 
2877     dnnl_query_scratchpad_engine, ///< scratchpad engine -- engine to be used
2878     ///  for creating scratchpad memory
2879 
2880     dnnl_query_impl_info_str, ///< implementation name
2881 
2882     dnnl_query_reorder_src_engine, ///< source engine
2883     dnnl_query_reorder_dst_engine, ///< destination engine
2884 
2885     dnnl_query_prop_kind, ///< propagation kind
2886 
2887     // memory and op descriptor section
2888     dnnl_query_some_d = 64, ///< stub
2889     dnnl_query_op_d, ///< op descriptor
2890     dnnl_query_convolution_d, ///< convolution descriptor
2891     dnnl_query_deconvolution_d, ///< deconvolution descriptor
2892     dnnl_query_shuffle_d, ///< shuffle descriptor
2893     dnnl_query_eltwise_d, ///< eltwise descriptor
2894     dnnl_query_softmax_d, ///< softmax descriptor
2895     dnnl_query_pooling_d, ///< pooling descriptor
2896     dnnl_query_lrn_d, ///< lrn descriptor
2897     dnnl_query_batch_normalization_d, ///< batch normalization descriptor
2898     dnnl_query_layer_normalization_d, ///< layer normalization descriptor
2899     dnnl_query_inner_product_d, ///< inner product descriptor
2900     dnnl_query_rnn_d, ///< rnn descriptor
2901     dnnl_query_gemm_d, ///< GEMM descriptor (internal)
2902     dnnl_query_binary_d, ///< binary descriptor
2903     dnnl_query_logsoftmax_d, ///< logsoftmax descriptor
2904     dnnl_query_matmul_d, ///< matrix multiplication (matmul) descriptor
2905     dnnl_query_resampling_d, ///< resampling descriptor
2906     dnnl_query_pooling_v2_d, ///< pooling version 2 descriptor
2907     dnnl_query_reduction_d, ///< reduction descriptor
2908     dnnl_query_prelu_d, ///< prelu descriptor
2909 
2910     // memory descriptor section
2911     dnnl_query_some_md = 128, ///< stub
2912     dnnl_query_src_md, ///< source memory desc
2913     dnnl_query_diff_src_md, ///< source gradient memory desc
2914     dnnl_query_weights_md, ///< weights memory descriptor desc
2915     dnnl_query_diff_weights_md, ///< weights grad. memory desc
2916     dnnl_query_dst_md, ///< destination memory desc
2917     dnnl_query_diff_dst_md, ///< destination grad. memory desc
2918     dnnl_query_workspace_md, ///< workspace memory desc
2919     dnnl_query_scratchpad_md, ///< scratchpad memory desc
2920     dnnl_query_exec_arg_md = 255, ///< memory desc of an execute argument
2921 
2922     // Max value to prevent UB for internal use only dnnl_query_t
2923     dnnl_query_max = 0x7fff,
2924 } dnnl_query_t;
2925 
2926 /// @} dnnl_api_primitives_common
2927 
2928 /// @} dnnl_api_primitives
2929 
2930 /// @addtogroup dnnl_api_stream
2931 /// @{
2932 
2933 /// @brief Stream flags.
2934 typedef enum {
2935     // In-order execution.
2936     dnnl_stream_in_order = 0x1U,
2937     /// Out-of-order execution.
2938     dnnl_stream_out_of_order = 0x2U,
2939     /// Default stream configuration.
2940     dnnl_stream_default_flags = dnnl_stream_in_order,
2941 } dnnl_stream_flags_t;
2942 
2943 /// @struct dnnl_stream
2944 /// An opaque structure to describe an execution stream.
2945 struct dnnl_stream;
2946 /// An execution stream handle.
2947 typedef struct dnnl_stream *dnnl_stream_t;
2948 /// A constant execution stream handle.
2949 typedef const struct dnnl_stream *const_dnnl_stream_t;
2950 
2951 /// @} dnnl_api_stream
2952 
2953 /// @addtogroup dnnl_api_service
2954 /// @{
2955 
2956 /// No runtime (disabled)
2957 #define DNNL_RUNTIME_NONE 0u
2958 
2959 /// Sequential runtime (CPU only)
2960 #define DNNL_RUNTIME_SEQ 1u
2961 
2962 /// OpenMP runtime (CPU only)
2963 #define DNNL_RUNTIME_OMP 2u
2964 
2965 /// TBB runtime (CPU only)
2966 #define DNNL_RUNTIME_TBB 4u
2967 
2968 /// Threadpool runtime (CPU only)
2969 #define DNNL_RUNTIME_THREADPOOL 8u
2970 
2971 /// OpenCL runtime
2972 #define DNNL_RUNTIME_OCL 256u
2973 
2974 /// SYCL runtime
2975 #define DNNL_RUNTIME_SYCL 512u
2976 
2977 /// DPC++ runtime
2978 #define DNNL_RUNTIME_DPCPP DNNL_RUNTIME_SYCL
2979 
2980 /// Structure containing version information as per [Semantic
2981 /// Versioning](https://semver.org)
2982 typedef struct {
2983     int major; ///< Major version
2984     int minor; ///< Minor version
2985     int patch; ///< Patch version
2986     const char *hash; ///< Git hash of the sources (may be absent)
2987     unsigned cpu_runtime; ///< CPU runtime
2988     unsigned gpu_runtime; ///< GPU runtime
2989 } dnnl_version_t;
2990 
2991 /// Disable profiling completely
2992 #define DNNL_JIT_PROFILE_NONE 0u
2993 
2994 /// Enable VTune Amplifier integration
2995 #define DNNL_JIT_PROFILE_VTUNE 1u
2996 
2997 /// Enable Linux perf integration via perfmap files
2998 #define DNNL_JIT_PROFILE_LINUX_PERFMAP 2u
2999 
3000 /// Enable Linux perf integration via jitdump files
3001 #define DNNL_JIT_PROFILE_LINUX_JITDUMP 4u
3002 
3003 /// Instruct Linux perf integration via jitdump files to use TSC. @ref
3004 /// DNNL_JIT_PROFILE_LINUX_JITDUMP must be set too for this to take effect.
3005 #define DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC 8u
3006 
3007 /// Enable Linux perf integration (both jitdump and perfmap)
3008 #define DNNL_JIT_PROFILE_LINUX_PERF \
3009     (DNNL_JIT_PROFILE_LINUX_JITDUMP | DNNL_JIT_PROFILE_LINUX_PERFMAP)
3010 
3011 /// CPU instruction set flags
3012 typedef enum {
3013     /// Any ISA (excepting those listed as initial support)
3014     dnnl_cpu_isa_all = 0x0,
3015 
3016     /// Intel Streaming SIMD Extensions 4.1 (Intel SSE4.1)
3017     dnnl_cpu_isa_sse41 = 0x1,
3018 
3019     /// Intel Advanced Vector Extensions (Intel AVX)
3020     dnnl_cpu_isa_avx = 0x3,
3021 
3022     /// Intel Advanced Vector Extensions 2 (Intel AVX2)
3023     dnnl_cpu_isa_avx2 = 0x7,
3024 
3025     /// Intel Advanced Vector Extensions 512 (Intel AVX-512) subset
3026     /// for Intel Xeon Phi processors x200 Series.
3027     dnnl_cpu_isa_avx512_mic = 0xf,
3028 
3029     /// Intel AVX-512 subset
3030     /// for Intel Xeon Phi processors 7235, 7285, 7295 Series.
3031     dnnl_cpu_isa_avx512_mic_4ops = 0x1f,
3032 
3033     /// Intel AVX-512 subset for Intel Xeon Scalable processor family
3034     /// and Intel Core processor family.
3035     dnnl_cpu_isa_avx512_core = 0x27,
3036 
3037     /// Intel AVX-512 and Intel Deep Learning Boost (Intel DL Boost) support
3038     /// for Intel Xeon Scalable processor family
3039     /// and Intel Core processor family.
3040     dnnl_cpu_isa_avx512_core_vnni = 0x67,
3041 
3042     /// Intel AVX-512, Intel DL Boost and bfloat16 support
3043     /// for Intel Xeon Scalable processor family
3044     /// and Intel Core processor family.
3045     dnnl_cpu_isa_avx512_core_bf16 = 0xe7,
3046 
3047     /// Intel AVX-512, Intel DL Boost and bfloat16 support and
3048     /// Intel AMX with 8-bit integer and bfloat16 support
3049     dnnl_cpu_isa_avx512_core_amx = 0x3e7,
3050 
3051     /// Intel AVX2 and Intel Deep Learning Boost (Intel DL Boost) support
3052     dnnl_cpu_isa_avx2_vnni = 0x407,
3053 
3054 } dnnl_cpu_isa_t;
3055 
3056 /// CPU ISA hints flags
3057 typedef enum {
3058     /// No hints (use default features)
3059     dnnl_cpu_isa_no_hints = 0x0,
3060 
3061     /// Prefer to exclusively use Ymm registers for computations
3062     dnnl_cpu_isa_prefer_ymm = 0x1,
3063 } dnnl_cpu_isa_hints_t;
3064 
3065 /// @} dnnl_api_service
3066 
3067 /// @} dnnl_api
3068 
3069 #ifdef __cplusplus
3070 }
3071 #endif
3072 
3073 #endif /* ONEAPI_DNNL_TYPES_H */
3074